-- Filename: cordic_square_root_rotate_struct.vhd.vhd
-- Created by HDL-SCHEM-Editor at Sun Jan 19 09:13:34 2025
architecture struct of cordic_square_root_rotate is
    -- How many iterations can be executed before the root does not change anymore:
    -- The radicand is at start in the range   : 0.25...1.0
    -- The root will be at the end in the range: 0.50...1.0
    -- The hyperbolic cordic algorithm for calculating the square root uses these equations:
    --     x[0] = radicand + 0.36 > 0
    --     y[0] = radicand - 0.36
    --     for n in 1 to c_number_of_iterations
    --         if y[n-1]>0 then
    --             x[n] = x[n-1] - abs(y[n-1]*2**(-n)) > 0
    --             y[n] = y[n-1] - x[n-1]*2**(-n)
    --         else
    --             x[n] = x[n-1] - abs(y[n-1]*2**(-n)) > 0
    --             y[n] = y[n-1] + x[n-1]*2**(-n)
    -- The x-coordinate x[n] is always positive and decreased in each step.
    -- The y-coordinate y[n] can be positive and negative and is moved towards 0.
    -- The first x-coordinate x[0] is calculated by adding      0.36 to   the radicand which creates this range:  0.61<x[0]<1.36
    -- The first y-coordinate y[0] is calculated by subtracting 0.36 from the radicand which creates this range:  y[0]=x[0]-0.72  => -0.11<y[0]<0.64  
    -- delta_y[1] = x[0]*1/2      => 0.61*1/2<delta_y[1]<1.36*1/2 => 0.30<delta_y[1]<0.68
    -- delta_x[1] = abs(y[0]*1/2) => 0.00*1/2<delta_x[1]<0.64*1/2 => 0.00<delta_x[1]<0.32
    -- If y[0] is positive (0.72<x[0]<1.36) then the new maximum range of x[1] is: x[1] = x[0] - abs(y[0]*1/2) = x[0] - (x[0]*1/2 - 0.72*1/2) = 1/2*x[0] + 0.36 => 1/2*0.72+0.36<1/2*x[0]+0.36<1/2*1.36+0.36 => 0.72<x[1]<1.04
    -- If y[0] is negative (0.61<x[0]<0.72) then the new maximum range of x[1] is: x[1] = x[0] - abs(y[0]*1/2) = x[0] + (x[0]*1/2 - 0.72*1/2) = 3/2*x[0] - 0.36 => 3/2*0.61-0.36<3/2*x[0]-0.36<3/2*0.72-0.36 => 0.55<x[1]<0.72
    -- Both ranges added create this range for x[1]:  0.55<x[1]<1.04
    -- If y[0] is positive (0.72<x[0]<1.36) then the new maximum range of y[1] is: y[1] = y[0] - delta_y[1] = x[0]-0.72 - x[0]/2 = 1/2*x[0]-0.72 => 1/2*0.72-0.72<1/2*x[0]-0.72<1/2*1.36-0.72 => -0.36<y[1]<-0.04
    -- If y[0] is negative (0.61<x[0]<0.72) then the new maximum range of y[1] is: y[1] = y[0] + delta_y[1] = x[0]-0.72 + x[0]/2 = 3/2*x[0]-0.72 => 3/2*0.61-0.72<3/2*x[0]-0.72<3/2*0.72-0.72 =>  0.20<y[1]< 0.36
    -- Both ranges for y[1] cannot be joined to a single range: -0.36<y[1]<-0.04 V 0.20<y[1]<0.36
    -- delta_y[2] = x[1]*1/4      => 0.14<delta_y[2]<0.26
    -- delta_x[2] = abs(y[1]*1/4) => 0.00<delta_x[2]<0.09
    -- If y[1] is positive ( 0.20<y[1]< 0.36) then the new maximum range of y[2] is: y[2] = y[1] - delta_y[2] =>  0.20-0.26<y[2]< 0.36-0.14 => -0.06<y[2]<0.22
    -- If y[1] is negative (-0.36<y[1]<-0.04) then the new maximum range of y[2] is: y[2] = y[1] + delta_y[2] => -0.36+0.14<y[2]<-0.04+0.26 => -0.22<y[2]<0.22
    -- Both ranges added create this range for y[2]: -0.22<y[1]<0.22
    -- delta_x[3] = abs(y[2]*1/8) => 0.00..0.0275
    -- As can be seen, delta_x (which modifies the root) is reduced by a factor 4 in each iteration.
    -- This means a shift to the right by 2 bits in each iteration.
    -- As delta_x is calculated from the y-coordinate which has g_radicand_width_ext bits,
    -- delta_x will reach 0 after g_radicand_width_ext/2 iterations and the algorithm can be stopped: 
    constant c_number_of_iterations : natural := g_radicand_width_even/2;

    function calculate_nr_of_iterations_per_period return positive is
        variable remainder             : natural;
        variable iterations_per_period : positive;
    begin
        if g_latency_rotate_by_cordic=0 then
            iterations_per_period := c_number_of_iterations;
        else
            remainder := c_number_of_iterations rem g_latency_rotate_by_cordic;
            if remainder=0 then
                iterations_per_period := c_number_of_iterations/g_latency_rotate_by_cordic;
            else
                iterations_per_period := (c_number_of_iterations - remainder + g_latency_rotate_by_cordic)/g_latency_rotate_by_cordic;
            end if;
        end if;
        return iterations_per_period;
    end function ;
    constant c_nr_of_iterations_per_period : positive := calculate_nr_of_iterations_per_period;

    function calculate_nr_of_all_iterations return positive is
    begin
        if g_latency_rotate_by_cordic=0 then
            return c_number_of_iterations;
        else
            return g_latency_rotate_by_cordic * c_nr_of_iterations_per_period;
        end if;
    end function;
    constant c_nr_of_all_iterations : positive := calculate_nr_of_all_iterations;
    constant c_nr_of_periods        : positive := c_nr_of_all_iterations/c_nr_of_iterations_per_period;

    type t_signed_coord  is array (natural range <>) of signed(g_radicand_width_ext+1 downto 0);

    type t_natural_array is array (natural range <>) of natural range 0 to c_nr_of_all_iterations;
    function calculate_iteration_sequence return t_natural_array is
        variable iteration_sequence : t_natural_array(c_nr_of_all_iterations-1 downto 0);
        variable k                  : natural;
        variable k_valid            : natural;
    begin
        k := 0;
        for iteration in 1 to c_nr_of_all_iterations loop
            while true loop
                if 3**(k+1)+2*k-1<=2*iteration then
                    k := k + 1;
                else
                    k_valid := k - 1;
                    exit;
                end if;
            end loop;
            iteration_sequence(iteration-1) := iteration - k_valid;
        end loop;
        for iteration in 1 to c_nr_of_all_iterations loop
            report "iteration_sequence = " & integer'image(iteration_sequence(iteration-1));
        end loop;
        return iteration_sequence;
    end function;
    constant c_iteration_sequence : t_natural_array := calculate_iteration_sequence;

    type t_step is array (c_nr_of_periods-1 downto 0) of t_natural_array(c_nr_of_iterations_per_period-1 downto 0);
    function calculate_step return t_step is
        variable v_step : t_step;
    begin
        for period in 0 to c_nr_of_periods-1 loop
            for iteration in 0 to c_nr_of_iterations_per_period-1 loop
                -- Shift the entries by 1 position, because generating steps is clocked:
                if period=0 then
                    v_step(c_nr_of_periods-1)(iteration) := c_iteration_sequence(c_nr_of_iterations_per_period*period + iteration);
                else
                    v_step(period-1)         (iteration) := c_iteration_sequence(c_nr_of_iterations_per_period*period + iteration);
                end if;
            end loop;
        end loop;
        return v_step;
    end function;
    constant c_step : t_step := calculate_step;
    signal counter     : natural range 0 to c_nr_of_periods-1;
    signal enable_reg  : std_logic;
    signal first_step  : std_logic;
    signal ready_steps : std_logic;
    signal steps       : t_natural_array(c_nr_of_iterations_per_period-1 downto 0);
    signal x_coord     : t_signed_coord(c_nr_of_iterations_per_period downto 0);
    signal x_coord_reg : signed(g_radicand_width_ext+1 downto 0);
    signal y_coord     : t_signed_coord(c_nr_of_iterations_per_period downto 0);
    signal y_coord_reg : signed(g_radicand_width_ext+1 downto 0);
    component cordic_square_root_control is
        generic (
            constant g_counter_max : natural := 16
        );
        port (
            clk_i          : in  std_logic;
            res_i          : in  std_logic;
            start_cordic_i : in  std_logic;
            counter_o      : out natural range 0 to g_counter_max;
            enable_reg_o   : out std_logic;
            first_step_o   : out std_logic;
            ready_steps_o  : out std_logic
        );
    end component;
    component cordic_square_root_rotate_step is
        generic (
            constant g_radicand_width_ext   : natural := 32;
            constant g_nr_of_all_iterations : natural := 16
        );
        port (
            step_i    : in  natural range 0 to g_nr_of_all_iterations-1;
            x_coord_i : in  signed(g_radicand_width_ext+1 downto 0);
            y_coord_i : in  signed(g_radicand_width_ext+1 downto 0);
            x_coord_o : out signed(g_radicand_width_ext+1 downto 0);
            y_coord_o : out signed(g_radicand_width_ext+1 downto 0)
        );
    end component;
begin
    -- When g_latency_rotate_by_cordic=0, then the signals connected to the outputs
    -- ready_steps_o and enable_reg_o of rotation_control will not be read.
    -- The signal counter will have the correct value 0 in this case.
    cordic_square_root_control_inst : cordic_square_root_control
        generic map (
            g_counter_max => c_nr_of_periods-1
        )
        port map (
            clk_i          => clk_i,
            res_i          => res_i,
            start_cordic_i => start_cordic_i,
            counter_o      => counter,
            enable_reg_o   => enable_reg,
            first_step_o   => first_step,
            ready_steps_o  => ready_steps
        );
    p_step: process (res_i, clk_i)
    begin
        if res_i='1' then
            steps <= c_step(c_nr_of_periods-1);
        elsif rising_edge(clk_i) then
            if enable_reg='1' and
               g_latency_rotate_by_cordic/=0 and
               g_latency_rotate_by_cordic/=1
            then
                steps <= c_step(counter);
            end if;
        end if;
    end process;
    x_coord(0) <= radicand_p_i when first_step='1' or g_latency_rotate_by_cordic=0 or g_latency_rotate_by_cordic=1 else x_coord_reg;
    y_coord(0) <= radicand_m_i when first_step='1' or g_latency_rotate_by_cordic=0 or g_latency_rotate_by_cordic=1 else y_coord_reg;
    rotate_by_cordic_step_g: for i in 0 to c_nr_of_iterations_per_period-1 generate
        cordic_square_root_rotate_step_inst : cordic_square_root_rotate_step
            generic map (
                g_radicand_width_ext   => g_radicand_width_ext,
                g_nr_of_all_iterations => c_nr_of_all_iterations
            )
            port map (
                step_i    => steps(i),
                x_coord_i => x_coord(i),
                y_coord_i => y_coord(i),
                x_coord_o => x_coord(i+1),
                y_coord_o => y_coord(i+1)
            );
    end generate rotate_by_cordic_step_g;
    comb_g: if g_latency_rotate_by_cordic=0 generate
        x_coord_reg    <= x_coord(c_nr_of_iterations_per_period);
        y_coord_reg    <= (others => '0');
        ready_cordic_o <= start_cordic_i;
    end generate comb_g;
    clocked_g: if g_latency_rotate_by_cordic/=0 generate
        p3: process (res_i, clk_i)
        begin
            if res_i='1' then
                x_coord_reg <= (others => '0');
                y_coord_reg <= (others => '0');
            elsif rising_edge(clk_i) then
                if enable_reg='1' then
                    x_coord_reg <= x_coord(c_nr_of_iterations_per_period);
                    y_coord_reg <= y_coord(c_nr_of_iterations_per_period);
                    -- For better readability x_coord_reg and y_coord_reg are both signed,
                    -- but x_coord_reg is always positive.
                    -- For better readability x_coord_reg and y_coord_reg have both an overflow bit,
                    -- but this bit is only needed for x_coord_reg.
                    -- As synthesis cannot detect these circumstances, it is made clear here:
                    x_coord_reg(x_coord_reg'high) <= '0'; -- Always positive.
                    y_coord_reg(y_coord_reg'high) <= y_coord(c_nr_of_iterations_per_period)(y_coord_reg'high-1); -- Always 2 sign bits.
                end if;
            end if;
        end process;
        ready_cordic_o <= ready_steps;
    end generate clocked_g;
    -- Cut off sign bit:
    square_root_cordic_o <= unsigned(x_coord_reg(g_radicand_width_ext downto 0));
end architecture;
