-- Filename: division_srt_radix2_step_pla.vhd.vhd
-- Created by HDL-SCHEM-Editor at Fri Sep 20 16:34:19 2024
architecture pla of division_srt_radix2_step is
    type t_std_logic_2_array is array (natural range <>) of std_logic_vector(1 downto 0);
    function init_table return t_std_logic_2_array is
        variable xor_vector_unsigned   : unsigned(3 downto 0);
        variable carry_vector_unsigned : unsigned(3 downto 0);
        variable sum_unsigned          : unsigned(3 downto 0);
        variable address_unsigned      : unsigned(8 downto 0);
        variable address               : integer;
        variable table                 : t_std_logic_2_array(511 downto 0);
    begin
        for divisor_sign in 0 to 1 loop
            for xor_vector_int in 0 to 15 loop
                for carry_vector_int in 0 to 15 loop
                    xor_vector_unsigned   := to_unsigned(xor_vector_int, 4);
                    carry_vector_unsigned := to_unsigned(carry_vector_int, 4);
                    sum_unsigned          := xor_vector_unsigned + carry_vector_unsigned;
                    address_unsigned := to_unsigned(divisor_sign, 1) & xor_vector_unsigned & carry_vector_unsigned;
                    address          := to_integer(address_unsigned);
                    if sum_unsigned="0000" or sum_unsigned="1111" then
                        table(address) := "00";
                    elsif (divisor_sign=0 and sum_unsigned(3)='0') or (divisor_sign=1 and sum_unsigned(3)='1') then
                        table(address) := "10";
                    else
                        table(address) := "01";
                    end if;
                end loop;
            end loop;
        end loop;
        return table;
    end function;
    constant c_table : t_std_logic_2_array(511 downto 0) := init_table;
    signal bits_to_check : unsigned(8 downto 0);
    signal carry_vector  : signed(g_op_width-2 downto 0);
    signal quotient_bits : std_logic_vector(1 downto 0);
    signal xor_vector    : signed(g_op_width-1 downto 0);
begin
    -- Quotient bit selection by SRT algorithm:
    -- A radix2 SRT algorithm calculates the remainders R by this formula:
    --      R(i+1) = 2*R(i) - q(i)*Divisor           (1)
    --      with 2*R(0)=Dividend and q(i) in {-1,0,+1}
    -- The quotient bit q(i) must be selected in a way that always this condition is fulfilled:
    --      abs(R(i+1)) <= abs(Divisor)              (2)
    -- If q=0 is selected then equation (1) changes to
    --      R(i+1) = 2*R(i)                          (3)
    -- and condition (2) is only fulfilled, if abs(R(i)) is smaller than or equal to abs(Divisor/2).
    -- Therefore the condition for selecting q=0 is:
    --      abs(  R(i)) <= abs(Divisor/2)
    --      abs(2*R(i)) <= abs(Divisor  )            (4)
    -- This condition for selecting q=0 can be transformed:
    --      -abs(Divisor) <= 2*R(i) <= +abs(Divisor) (5)
    -- If 2*R(i) is outside the range of (5) and bigger than abs(Divisor), then 2*R(i) is positive and
    -- q=+1 must be selected for a positive divisor or q=-1 must be selected for a negative divisor in
    -- order to make R(i+1) smaller than abs(Divisor) in both cases.
    -- If 2*R(i) is outside the range of (5) and smaller than -abs(Divisor), then 2*R(i) is negative and
    -- q=-1 must be selected for a positive divisor or q=+1 must be selected for a negative divisor in
    -- order to make R(i+1) bigger than -abs(Divisor) in both cases.
    -- If 2*R(i) is inside the range of (5), q=0 is selected.
    -- As long as R(i) is always known exactly, there is no reason to select q=0 and equation (2) is
    -- always be fulfilled by using q=+/-1 (see module division_signed from this website).
    --
    -- But the SRT algorithm was invented in order to handle values of R(i) which are only approximately known.
    -- In SRT division the operands (provided in 2's complement) are normalized, which means they always start with "01..." or "10...".
    -- When R(i+1) is calculated by R(i+1)=2*R(i) (in the case when q=0), then R(i+1) will have 1 bit more than R(i).
    -- In the next step, in order to calculate R(i+2), first 2*R(i+1) is calculated, which means this product
    -- will have 2 bits more than R(i) and the calculation of the R(i)'s must use 2 bits more than the divisor has.
    -- Therefore the divisor will have values in the form "0001xxx" or "1110xxx" at the addition in equation (1).
    -- In order to accelerate this addition, carry-save adders are used, which speeds up addition a lot.
    -- So when equation (5) is checked, 2*R(i) is only available in form of a sum-part and a carry-part.
    -- In order to get an approximative value of 2*R(i) in a fast way, only the upper 4 bits of the sum-part and the carry-part are added.
    -- When the approximative value of 2*R(i) is "1111" or "0000" (and for example 2*R(i) has 8 bits) the range of 2*R(i) can be estimated:
    --   If 2*R(i) starts with "1111", then: "11110000" <= 2*R(i) <= "00000000"       (upper limit calculated by "11110000"+"00010000", with "00010000" being the maximum error provided by the estimation)
    --   If 2*R(i) starts with "0000", then: "00000000" <= 2*R(i) <= "00010000"       (upper limit calculated by "00000000"+"00010000")
    -- Both cases together mean:             "11110000" <= 2*R(i) <= "00010000"
    -- But as stated above the divisor has values between:    "11100000" <= divisor <= "11101111" (divisor lower range)
    --                                                     or "00010000" <= divisor <= "00011111" (divisor upper range)
    -- The range of 2*R(i) and the ranges of the divisor can be ordered:
    --        divisor lower range:      range of 2*R(i):        divisor upper range:
    --      "11100000"..."11101111", "11110000"..."00010000", "00010000"..."00011111"
    -- So when 2*R(i) starts with "1111" or "0000" the equation (5) is fulfilled and q=0 can be selected without knowing the exact value of 2*R(i).
    
    bits_to_check <= divisor_norm_i(divisor_norm_i'high) &
                     unsigned(xor_vector_i(g_op_width downto g_op_width-3)) &
                     unsigned(carry_vector_i(g_op_width-1 downto g_op_width-4));
    quotient_bits <= c_table(to_integer(unsigned(bits_to_check)));
    quotient_bit_plus_one_o  <= quotient_bits(1);
    quotient_bit_minus_one_o <= quotient_bits(0);
    process (quotient_bits, divisor_norm_i, xor_vector_i, carry_vector_i)
    begin
        case quotient_bits is
            when "00" =>
                xor_vector   <= xor_vector_i(g_op_width-1 downto 0) xor carry_vector_i(g_op_width-2 downto 0)&'0';                        -- Add only carry.
                carry_vector <= xor_vector_i(g_op_width-2 downto 0) and carry_vector_i(g_op_width-3 downto 0)&'0';
            when "10" =>
                xor_vector   <= xor_vector_i(g_op_width-1 downto 0) xor carry_vector_i(g_op_width-2 downto 0)&'1' xor not divisor_norm_i; -- Add carry and inverted divisor.
                carry_vector <= (xor_vector_i  (g_op_width-2 downto 0)     and carry_vector_i(g_op_width-3 downto 0)&'1') or              -- Set LSB of carry vector to 1 for
                                (xor_vector_i  (g_op_width-2 downto 0)     and not divisor_norm_i(g_op_width-2 downto 0)) or              -- 2's complement of divisor.
                                (carry_vector_i(g_op_width-3 downto 0)&'1' and not divisor_norm_i(g_op_width-2 downto 0));
            when others =>
                xor_vector   <= xor_vector_i(g_op_width-1 downto 0) xor carry_vector_i(g_op_width-2 downto 0)&'0' xor divisor_norm_i;     -- Add carry and divisor.
                carry_vector <= (xor_vector_i  (g_op_width-2 downto 0)     and carry_vector_i(g_op_width-3 downto 0)&'0') or
                                (xor_vector_i  (g_op_width-2 downto 0)     and divisor_norm_i(g_op_width-2 downto 0)    ) or
                                (carry_vector_i(g_op_width-3 downto 0)&'0' and divisor_norm_i(g_op_width-2 downto 0)    );
        end case;
    end process;
    -- Double the results for multiplexing between dividend and 2*R(i):
    xor_vector_o   <= xor_vector   & '0';
    carry_vector_o <= carry_vector & '0';
end architecture;
