diff --git a/tce/hdb/64bitTTA.hdb b/tce/hdb/64bitTTA.hdb new file mode 100644 index 0000000000..a211569362 Binary files /dev/null and b/tce/hdb/64bitTTA.hdb differ diff --git a/tce/hdb/vhdl/ALU64.vhdl b/tce/hdb/vhdl/ALU64.vhdl new file mode 100644 index 0000000000..13e6e80ac8 --- /dev/null +++ b/tce/hdb/vhdl/ALU64.vhdl @@ -0,0 +1,335 @@ +-- Copyright (c) 2002-2009 Tampere University of Technology. +-- +-- This file is part of TTA-Based Codesign Environment (TCE). +-- +-- Permission is hereby granted, free of charge, to any person obtaining a +-- copy of this software and associated documentation files (the "Software"), +-- to deal in the Software without restriction, including without limitation +-- the rights to use, copy, modify, merge, publish, distribute, sublicense, +-- and/or sell copies of the Software, and to permit persons to whom the +-- Software is furnished to do so, subject to the following conditions: +-- +-- The above copyright notice and this permission notice shall be included in +-- all copies or substantial portions of the Software. +-- +-- THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +-- IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +-- FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +-- AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +-- LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +-- FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +-- DEALINGS IN THE SOFTWARE. +------------------------------------------------------------------------------- + +-- 64-BIT LARGE ALU (ALU64) for TTA64 Project. +-- Designer: Latif AKCAY +-- University: Bayburt University, Istanbul Technical University, TURKEY. + +library IEEE; +use IEEE.std_Logic_1164.all; +use IEEE.numeric_std.all; + +package opcodes_add64_and64_eq64_gt64_gtu64_ior64_ltu64_ne64_shl1add64_shl64_shr64_shru64_sub64_sxh64_sxw64_xor64 is + + constant ADD64_OPC : std_logic_vector(3 downto 0) := "0000"; + constant AND64_OPC : std_logic_vector(3 downto 0) := "0001"; + constant EQ64_OPC : std_logic_vector(3 downto 0) := "0010"; + constant GT64_OPC : std_logic_vector(3 downto 0) := "0011"; + constant GTU64_OPC : std_logic_vector(3 downto 0) := "0100"; + constant IOR64_OPC : std_logic_vector(3 downto 0) := "0101"; + constant LTU64_OPC : std_logic_vector(3 downto 0) := "0110"; + constant NE64_OPC : std_logic_vector(3 downto 0) := "0111"; + constant SHL1ADD64_OPC : std_logic_vector(3 downto 0) := "1000"; + constant SHL64_OPC : std_logic_vector(3 downto 0) := "1001"; + constant SHR64_OPC : std_logic_vector(3 downto 0) := "1010"; + constant SHRU64_OPC : std_logic_vector(3 downto 0) := "1011"; + constant SUB64_OPC : std_logic_vector(3 downto 0) := "1100"; + constant SXH64_OPC : std_logic_vector(3 downto 0) := "1101"; + constant SXW64_OPC : std_logic_vector(3 downto 0) := "1110"; + constant XOR64_OPC : std_logic_vector(3 downto 0) := "1111"; + +end opcodes_add64_and64_eq64_gt64_gtu64_ior64_ltu64_ne64_shl1add64_shl64_shr64_shru64_sub64_sxh64_sxw64_xor64; + +library IEEE; +use IEEE.std_Logic_1164.all; +use IEEE.numeric_std.all; +use work.util.all; +use work.opcodes_add64_and64_eq64_gt64_gtu64_ior64_ltu64_ne64_shl1add64_shl64_shr64_shru64_sub64_sxh64_sxw64_xor64.all; + + +package monolithic_alu_shladd_large_shift_pkg is + + function shift_func (input: std_logic_vector; shft_amount : std_logic_vector; + opc : std_logic_vector;dataw : integer; shiftw : integer) + return std_logic_vector; +end monolithic_alu_shladd_large_shift_pkg; + +package body monolithic_alu_shladd_large_shift_pkg is + + function shift_func (input: std_logic_vector; shft_amount : std_logic_vector; + opc: std_logic_vector;dataw : integer; shiftw : integer) + return std_logic_vector is + + constant max_shift : integer := shiftw; + variable shift_in : std_logic; + type std_logic_vector_array is array (natural range <>) of std_logic_vector(dataw-1 downto 0); + variable y_temp : std_logic_vector_array (0 to max_shift); + variable y : std_logic_vector(dataw-1 downto 0); + variable shift_ammount : std_logic_vector(shiftw-1 downto 0); + begin + shift_ammount := shft_amount(shiftw-1 downto 0); + + if ((opc = SHR64_OPC) or (opc = SHRU64_OPC)) then + y_temp(0) := flip_bits(input); + else + y_temp(0) := input; + end if; + + if (opc = SHR64_OPC) then + shift_in := y_temp(0)(0); + else + shift_in := '0'; + end if; + + + for i in 0 to max_shift-1 loop + if (shift_ammount(i) = '1') then + y_temp(i+1) := (others => shift_in); + y_temp(i+1) (dataw-1 downto 2**i) := y_temp(i) (dataw-1-2**i downto 0); + else + y_temp(i+1) := y_temp(i); + end if; + end loop; -- i + + if ( (opc = SHR64_OPC) or (opc = SHRU64_OPC)) then + y := flip_bits(y_temp(max_shift)); + else + y := y_temp(max_shift); + end if; + return y; + end shift_func; +end monolithic_alu_shladd_large_shift_pkg; + +library IEEE; +use IEEE.numeric_std.all; +use IEEE.std_logic_1164.all; +use IEEE.std_logic_arith.all; +use work.opcodes_add64_and64_eq64_gt64_gtu64_ior64_ltu64_ne64_shl1add64_shl64_shr64_shru64_sub64_sxh64_sxw64_xor64.all; +use work.monolithic_alu_shladd_large_shift_pkg.all; + +entity add64_and64_eq64_gt64_gtu64_ior64_ltu64_ne64_shl1add64_shl64_shr64_shru64_sub64_sxh64_sxw64_xor64 is + generic ( + dataw : integer := 64; + busw : integer := 64; + shiftw : integer := 5); + port( + A : in std_logic_vector(dataw-1 downto 0); + B : in std_logic_vector(dataw-1 downto 0); + OPC : in std_logic_vector(3 downto 0); + R : out std_logic_vector(dataw-1 downto 0) + ); +end add64_and64_eq64_gt64_gtu64_ior64_ltu64_ne64_shl1add64_shl64_shr64_shru64_sub64_sxh64_sxw64_xor64; + + +architecture comb of add64_and64_eq64_gt64_gtu64_ior64_ltu64_ne64_shl1add64_shl64_shr64_shru64_sub64_sxh64_sxw64_xor64 is + signal add_op1 : std_logic_vector(dataw-1 downto 0); + signal add_result : std_logic_vector(dataw-1 downto 0); + signal shift_result : std_logic_vector(dataw-1 downto 0); + signal gt : std_logic_vector(0 downto 0); + signal gtu : std_logic_vector(0 downto 0); + signal eq : std_logic_vector(0 downto 0); + signal neq : std_logic_vector(0 downto 0); + signal ltu : std_logic_vector(0 downto 0); + signal cmp : std_logic_vector(0 downto 0); + signal cmp_ext : std_logic_vector(dataw-1 downto 0); +begin + + gt <= "1" when (ieee.numeric_std.signed(B) > ieee.numeric_std.signed(A)) else "0"; + gtu <= "1" when (ieee.numeric_std.unsigned(B) > ieee.numeric_std.unsigned(A)) else "0"; + eq <= "1" when (A=B) else "0"; + neq <= "1" when (A/=B) else "0"; + ltu <= "1" when (ieee.numeric_std.unsigned(B) < ieee.numeric_std.unsigned(A)) else "0"; + + process (A,OPC) + begin + case OPC is + when ADD64_OPC => + add_op1 <= A; + when SHL1ADD64_OPC => + add_op1 <= A(dataw-2 downto 0)&'0'; + when others => -- SHL2ADD_OPC or others + add_op1 <= A(dataw-3 downto 0)&"00"; + end case; + end process; + + process (A,B,OPC, eq, gt, gtu, neq, ltu) + begin + case OPC is + when EQ64_OPC => + cmp <= eq; + when GT64_OPC => + cmp <= gt; + when GTU64_OPC => + cmp <= gtu; + when NE64_OPC => + cmp <= neq; + when LTU64_OPC => + cmp <= ltu; + when others => -- min max or others + cmp <= not gtu; + end case; + end process; + + add_result <= std_logic_vector(ieee.numeric_std.signed(add_op1) + ieee.numeric_std.signed(B)); + shift_result <= shift_func(B,A(shiftw-1 downto 0),OPC,dataw,shiftw); + cmp_ext <= ext(cmp, R'length); + + process (A,B,OPC, add_result, shift_result, cmp_ext) + begin -- process + case OPC is + when ADD64_OPC => + R <= add_result; + when SHL1ADD64_OPC => + R <= add_result; + when SUB64_OPC => + R <= std_logic_vector(ieee.numeric_std.signed(A) - ieee.numeric_std.signed(B)); + when EQ64_OPC => + R <= cmp_ext; + when GT64_OPC => + R <= cmp_ext; + when GTU64_OPC => + R <= cmp_ext; + when SHL64_OPC => + R <= shift_result; + when SHR64_OPC => + R <= shift_result; + when SHRU64_OPC => + R <= shift_result; + when AND64_OPC => + R <= A and B; + when IOR64_OPC => + R <= A or B; + when XOR64_OPC => + R <= A xor B; + when SXH64_OPC => + R <= SXT(A(15 downto 0), R'length); + when SXW64_OPC => + R <= SXT(A(dataw/2-1 downto 0), R'length); + when others => + R <= cmp_ext; + end case; + end process; +end comb; + +------------------------------------------------------------------------------- +-- Entity declaration for unit add64_and64_eq64_gt64_gtu64_ior64_ltu64_ne64_shl1add64_shl64_shr64_shru64_sub64_sxh64_sxw64_xor64 latency 1 +------------------------------------------------------------------------------- + +library IEEE; +use IEEE.std_Logic_1164.all; +use IEEE.std_Logic_arith.all; + + +entity fu_add64_and64_eq64_gt64_gtu64_ior64_ltu64_ne64_shl1add64_shl64_shr64_shru64_sub64_sxh64_sxw64_xor64_always_1 is + generic ( + dataw : integer := 64; -- Operand Width + busw : integer := 64; -- Bus Width + shiftw : integer := 5 + ); + + port ( + clk : in std_logic; + rstx : in std_logic; + glock : in std_logic; + operation_in : in std_logic_vector(3 downto 0); + data_in1t_in : in std_logic_vector(dataw-1 downto 0); + load_in1t_in : in std_logic; + data_in2_in : in std_logic_vector(dataw-1 downto 0); + load_in2_in : in std_logic; + data_out1_out : out std_logic_vector(dataw-1 downto 0) + ); + +end fu_add64_and64_eq64_gt64_gtu64_ior64_ltu64_ne64_shl1add64_shl64_shr64_shru64_sub64_sxh64_sxw64_xor64_always_1; + +architecture rtl of fu_add64_and64_eq64_gt64_gtu64_ior64_ltu64_ne64_shl1add64_shl64_shr64_shru64_sub64_sxh64_sxw64_xor64_always_1 is + + component add64_and64_eq64_gt64_gtu64_ior64_ltu64_ne64_shl1add64_shl64_shr64_shru64_sub64_sxh64_sxw64_xor64 + generic ( + dataw : integer := 64; + shiftw : integer := 5); + port( + A : in std_logic_vector(dataw-1 downto 0); + B : in std_logic_vector(dataw-1 downto 0); + OPC : in std_logic_vector(3 downto 0); + R : out std_logic_vector(dataw-1 downto 0) + ); + end component; + + signal data_in1t_in_reg : std_logic_vector(dataw-1 downto 0); + signal data_in2_in_reg : std_logic_vector(dataw-1 downto 0); + signal data_in2_in_tempreg : std_logic_vector(dataw-1 downto 0); + signal data_out1_out_reg : std_logic_vector(dataw-1 downto 0); + signal opc_reg : std_logic_vector(3 downto 0); + signal control : std_logic_vector(1 downto 0); + +begin + + fu_arch : add64_and64_eq64_gt64_gtu64_ior64_ltu64_ne64_shl1add64_shl64_shr64_shru64_sub64_sxh64_sxw64_xor64 + generic map ( + dataw => dataw, + shiftw => shiftw) + port map( + A => data_in1t_in_reg, + B => data_in2_in_reg, + OPC => opc_reg, + R => data_out1_out_reg + ); + + control <= load_in2_in & load_in1t_in; + + regs : process (clk, rstx) + begin -- process regs + if rstx = '0' then -- asynchronous Ret (active low) + data_in1t_in_reg <= (others => '0'); + data_in2_in_reg <= (others => '0'); + data_in2_in_tempreg <= (others => '0'); + opc_reg <= (others => '0'); + + elsif clk'event and clk = '1' then -- rising clock edge + if (glock = '0') then + + case control is + when "11" => + data_in1t_in_reg <= data_in1t_in; + data_in2_in_reg <= data_in2_in; + data_in2_in_tempreg <= data_in2_in; + opc_reg <= operation_in; + when "10" => + data_in2_in_tempreg <= data_in2_in; + when "01" => + opc_reg <= operation_in; + data_in1t_in_reg <= data_in1t_in; + data_in2_in_reg <= data_in2_in_tempreg; + when others => null; + end case; + + end if; + end if; + end process regs; + + process (data_out1_out_reg) + begin -- process + if busw < dataw then + if busw > 1 then + data_out1_out(busw-1) <= data_out1_out_reg(dataw-1); + data_out1_out(busw-2 downto 0) <= data_out1_out_reg(busw-2 downto 0); + else + data_out1_out(0) <= data_out1_out_reg(0); + end if; + else + data_out1_out <= sxt(data_out1_out_reg,data_out1_out_reg'length); + end if; + end process; + +end rtl; + diff --git a/tce/hdb/vhdl/ALU64_1.vhdl b/tce/hdb/vhdl/ALU64_1.vhdl new file mode 100644 index 0000000000..e6af5ba1f7 --- /dev/null +++ b/tce/hdb/vhdl/ALU64_1.vhdl @@ -0,0 +1,260 @@ +-- Copyright (c) 2002-2009 Tampere University of Technology. +-- +-- This file is part of TTA-Based Codesign Environment (TCE). +-- +-- Permission is hereby granted, free of charge, to any person obtaining a +-- copy of this software and associated documentation files (the "Software"), +-- to deal in the Software without restriction, including without limitation +-- the rights to use, copy, modify, merge, publish, distribute, sublicense, +-- and/or sell copies of the Software, and to permit persons to whom the +-- Software is furnished to do so, subject to the following conditions: +-- +-- The above copyright notice and this permission notice shall be included in +-- all copies or substantial portions of the Software. +-- +-- THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +-- IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +-- FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +-- AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +-- LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +-- FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +-- DEALINGS IN THE SOFTWARE. +------------------------------------------------------------------------------- + +-- 64-BIT SMALL ALU (ALU64-1) for TTA64 Project. +-- Designer: Latif AKCAY +-- University: Bayburt University, Istanbul Technical University, TURKEY. + +library IEEE; +use IEEE.std_Logic_1164.all; +use IEEE.numeric_std.all; + +package opcodes_add64_and64_shl64_shru64_sub64_sxh64_xor64 is + + constant ADD64_OPC : std_logic_vector(2 downto 0) := "000"; + constant AND64_OPC : std_logic_vector(2 downto 0) := "001"; + constant SHL64_OPC : std_logic_vector(2 downto 0) := "010"; + constant SHRU64_OPC : std_logic_vector(2 downto 0) := "011"; + constant SXH64_OPC : std_logic_vector(2 downto 0) := "100"; + constant XOR64_OPC : std_logic_vector(2 downto 0) := "101"; + +end opcodes_add64_and64_shl64_shru64_sub64_sxh64_xor64; + +library IEEE; +use IEEE.std_Logic_1164.all; +use IEEE.numeric_std.all; +use work.util.all; +use work.opcodes_add64_and64_shl64_shru64_sub64_sxh64_xor64.all; + +package monolithic_alu_shladd_large_shift_pkg_2 is + + function shift_func (input: std_logic_vector; shft_amount : std_logic_vector; + opc : std_logic_vector;dataw : integer; shiftw : integer) + return std_logic_vector; +end monolithic_alu_shladd_large_shift_pkg_2; + +package body monolithic_alu_shladd_large_shift_pkg_2 is + + function shift_func (input: std_logic_vector; shft_amount : std_logic_vector; + opc: std_logic_vector;dataw : integer; shiftw : integer) + return std_logic_vector is + + constant max_shift : integer := shiftw; + variable shift_in : std_logic; + type std_logic_vector_array is array (natural range <>) of std_logic_vector(dataw-1 downto 0); + variable y_temp : std_logic_vector_array (0 to max_shift); + variable y : std_logic_vector(dataw-1 downto 0); + variable shift_ammount : std_logic_vector(shiftw-1 downto 0); + begin + shift_ammount := shft_amount(shiftw-1 downto 0); + + if (opc = SHRU64_OPC) then + y_temp(0) := flip_bits(input); + shift_in := '0'; + else + y_temp(0) := input; + shift_in := '0'; + end if; + + + for i in 0 to max_shift-1 loop + if (shift_ammount(i) = '1') then + y_temp(i+1) := (others => shift_in); + y_temp(i+1) (dataw-1 downto 2**i) := y_temp(i) (dataw-1-2**i downto 0); + else + y_temp(i+1) := y_temp(i); + end if; + end loop; -- i + + if ( opc = SHRU64_OPC ) then + y := flip_bits(y_temp(max_shift)); + else + y := y_temp(max_shift); + end if; + return y; + end shift_func; +end monolithic_alu_shladd_large_shift_pkg_2; + +library IEEE; +use IEEE.numeric_std.all; +use IEEE.std_logic_1164.all; +use IEEE.std_logic_arith.all; +use work.opcodes_add64_and64_shl64_shru64_sub64_sxh64_xor64.all; +use work.monolithic_alu_shladd_large_shift_pkg_2.all; + +entity add64_and64_shl64_shru64_sub64_sxh64_xor64 is + generic ( + dataw : integer := 64; + busw : integer := 64; + shiftw : integer := 5); + port( + A : in std_logic_vector(dataw-1 downto 0); + B : in std_logic_vector(dataw-1 downto 0); + OPC : in std_logic_vector(2 downto 0); + R : out std_logic_vector(dataw-1 downto 0) + ); +end add64_and64_shl64_shru64_sub64_sxh64_xor64; + + +architecture comb of add64_and64_shl64_shru64_sub64_sxh64_xor64 is + signal add_result : std_logic_vector(dataw-1 downto 0); + signal shift_result : std_logic_vector(dataw-1 downto 0); + +begin + + add_result <= std_logic_vector(ieee.numeric_std.signed(A) + ieee.numeric_std.signed(B)); + shift_result <= shift_func(B,A(shiftw-1 downto 0),OPC,dataw,shiftw); + + process (A,B,OPC, add_result, shift_result) + begin -- process + case OPC is + when ADD64_OPC => + R <= add_result; + when SHL64_OPC => + R <= shift_result; + when SHRU64_OPC => + R <= shift_result; + when AND64_OPC => + R <= A and B; + when XOR64_OPC => + R <= A xor B; + when SXH64_OPC => + R <= SXT(A(15 downto 0), R'length); + when others => + R <= (others => '0'); + end case; + end process; +end comb; + +------------------------------------------------------------------------------- +-- Entity declaration for unit add64_and64_shl64_shru64_sub64_sxh64_xor64 latency 1 +------------------------------------------------------------------------------- + +library IEEE; +use IEEE.std_Logic_1164.all; +use IEEE.std_Logic_arith.all; + + +entity fu_add64_and64_shl64_shru64_sub64_sxh64_xor64_always_1 is + generic ( + dataw : integer := 64; -- Operand Width + busw : integer := 64; -- Bus Width + shiftw : integer := 5 + ); + + port ( + clk : in std_logic; + rstx : in std_logic; + glock : in std_logic; + operation_in : in std_logic_vector(2 downto 0); + data_in1t_in : in std_logic_vector(dataw-1 downto 0); + load_in1t_in : in std_logic; + data_in2_in : in std_logic_vector(dataw-1 downto 0); + load_in2_in : in std_logic; + data_out1_out : out std_logic_vector(dataw-1 downto 0) + ); + +end fu_add64_and64_shl64_shru64_sub64_sxh64_xor64_always_1; + +architecture rtl of fu_add64_and64_shl64_shru64_sub64_sxh64_xor64_always_1 is + + component add64_and64_shl64_shru64_sub64_sxh64_xor64 + generic ( + dataw : integer := 64; + shiftw : integer := 5); + port( + A : in std_logic_vector(dataw-1 downto 0); + B : in std_logic_vector(dataw-1 downto 0); + OPC : in std_logic_vector(2 downto 0); + R : out std_logic_vector(dataw-1 downto 0)); + end component; + + signal data_in1t_in_reg : std_logic_vector(dataw-1 downto 0); + signal data_in2_in_reg : std_logic_vector(dataw-1 downto 0); + signal data_in2_in_tempreg : std_logic_vector(dataw-1 downto 0); + signal data_out1_out_reg : std_logic_vector(dataw-1 downto 0); + signal opc_reg : std_logic_vector(2 downto 0); + signal control : std_logic_vector(1 downto 0); + +begin + + fu_arch : add64_and64_shl64_shru64_sub64_sxh64_xor64 + generic map ( + dataw => dataw, + shiftw => shiftw) + port map( + A => data_in1t_in_reg, + B => data_in2_in_reg, + OPC => opc_reg, + R => data_out1_out_reg + ); + + control <= load_in2_in & load_in1t_in; + + regs : process (clk, rstx) + begin -- process regs + if rstx = '0' then -- asynchronous Ret (active low) + data_in1t_in_reg <= (others => '0'); + data_in2_in_reg <= (others => '0'); + data_in2_in_tempreg <= (others => '0'); + opc_reg <= (others => '0'); + + elsif clk'event and clk = '1' then -- rising clock edge + if (glock = '0') then + + case control is + when "11" => + data_in1t_in_reg <= data_in1t_in; + data_in2_in_reg <= data_in2_in; + data_in2_in_tempreg <= data_in2_in; + opc_reg <= operation_in; + when "10" => + data_in2_in_tempreg <= data_in2_in; + when "01" => + opc_reg <= operation_in; + data_in1t_in_reg <= data_in1t_in; + data_in2_in_reg <= data_in2_in_tempreg; + when others => null; + end case; + + end if; + end if; + end process regs; + + process (data_out1_out_reg) + begin -- process + if busw < dataw then + if busw > 1 then + data_out1_out(busw-1) <= data_out1_out_reg(dataw-1); + data_out1_out(busw-2 downto 0) <= data_out1_out_reg(busw-2 downto 0); + else + data_out1_out(0) <= data_out1_out_reg(0); + end if; + else + data_out1_out <= sxt(data_out1_out_reg,data_out1_out_reg'length); + end if; + end process; + +end rtl; + + diff --git a/tce/hdb/vhdl/LSU64.vhdl b/tce/hdb/vhdl/LSU64.vhdl new file mode 100644 index 0000000000..9754978809 --- /dev/null +++ b/tce/hdb/vhdl/LSU64.vhdl @@ -0,0 +1,470 @@ +-- Copyright (c) 2002-2009 Tampere University of Technology. +-- +-- This file is part of TTA-Based Codesign Environment (TCE). +-- +-- Permission is hereby granted, free of charge, to any person obtaining a +-- copy of this software and associated documentation files (the "Software"), +-- to deal in the Software without restriction, including without limitation +-- the rights to use, copy, modify, merge, publish, distribute, sublicense, +-- and/or sell copies of the Software, and to permit persons to whom the +-- Software is furnished to do so, subject to the following conditions: +-- +-- The above copyright notice and this permission notice shall be included in +-- all copies or substantial portions of the Software. +-- +-- THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +-- IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +-- FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +-- AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +-- LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +-- FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +-- DEALINGS IN THE SOFTWARE. +------------------------------------------------------------------------------- + +-- 64-BIT Little-Endian Load-Store Unit for TTA64 Project. +-- Designer: Latif AKCAY +-- University: Bayburt University, Istanbul Technical University, TURKEY. + +-- LSU64 Operations +-- ld16 : 0 +-- ld32 : 1 +-- ld64 : 2 +-- ld8 : 3 +-- ldu16 : 4 +-- ldu32 : 5 +-- ldu8 : 6 +-- st16 : 7 +-- st32 : 8 +-- st64 : 9 +-- st8 : 10 + + +package ld8_ld16_ld32_ldu8_ldu16_ldu32_ld64_st8_st16_st32_st64_opcodes is + + constant OPC_LD16 : integer := 0; + constant OPC_LD32 : integer := 1; + constant OPC_LD64 : integer := 2; + constant OPC_LD8 : integer := 3; + constant OPC_LDU16 : integer := 4; + constant OPC_LDU32 : integer := 5; + constant OPC_LDU8 : integer := 6; + constant OPC_ST16 : integer := 7; + constant OPC_ST32 : integer := 8; + constant OPC_ST64 : integer := 9; + constant OPC_ST8 : integer := 10; + +end ld8_ld16_ld32_ldu8_ldu16_ldu32_ld64_st8_st16_st32_st64_opcodes; + +library IEEE; +use IEEE.std_logic_1164.all; +use IEEE.std_logic_arith.all; +use work.ld8_ld16_ld32_ldu8_ldu16_ldu32_ld64_st8_st16_st32_st64_opcodes.all; + +entity lsu_ld8_ld16_ld32_ldu8_ldu16_ldu32_ld64_st8_st16_st32_st64_le3 is + generic ( + dataw : integer := 64; + addrw : integer := 16); + port( + -- socket interfaces: + t1data : in std_logic_vector(addrw-1 downto 0); + t1load : in std_logic; + t1opcode : in std_logic_vector(3 downto 0); + -- CHANGE + o1data : in std_logic_vector(dataw-1 downto 0); + o1load : in std_logic; + r1data : out std_logic_vector(dataw-1 downto 0); + -- external memory unit interface: + data_in : in std_logic_vector(dataw-1 downto 0); + data_out : out std_logic_vector(dataw-1 downto 0); + addr : out std_logic_vector(addrw-3-1 downto 0); + -- control signals + mem_en_x : out std_logic_vector(0 downto 0); -- active low + wr_en_x : out std_logic_vector(0 downto 0); -- active low + wr_mask_x : out std_logic_vector(dataw-1 downto 0); + + -- control signals: + glock : in std_logic; + clk : in std_logic; + rstx : in std_logic + ); +end lsu_ld8_ld16_ld32_ldu8_ldu16_ldu32_ld64_st8_st16_st32_st64_le3; + +architecture rtl of lsu_ld8_ld16_ld32_ldu8_ldu16_ldu32_ld64_st8_st16_st32_st64_le3 is + + type reg_array is array (natural range <>) of std_logic_vector(5 downto 0); + + signal addr_reg : std_logic_vector(addrw-3-1 downto 0); + signal data_out_reg : std_logic_vector(dataw-1 downto 0); + signal wr_en_x_reg : std_logic_vector(0 downto 0); + signal mem_en_x_reg : std_logic_vector(0 downto 0); + signal wr_mask_x_reg : std_logic_vector(dataw-1 downto 0); + + signal status_addr_reg : reg_array(1 downto 0); + + signal t1data_lower_3 : std_logic_vector(2 downto 0); + signal t1data_lower_2 : std_logic_vector(1 downto 0); + + -- information on the word (lsw/msw) needed in register + signal o1shadow_reg : std_logic_vector(dataw-1 downto 0); + signal r1_reg : std_logic_vector(dataw-1 downto 0); + + constant NOT_LOAD : std_logic_vector(2 downto 0) := "000"; + constant LD32 : std_logic_vector(2 downto 0) := "001"; + constant LD16 : std_logic_vector(2 downto 0) := "010"; + constant LD8 : std_logic_vector(2 downto 0) := "011"; + constant LDU16 : std_logic_vector(2 downto 0) := "100"; + constant LDU32 : std_logic_vector(2 downto 0) := "101"; + constant LD64 : std_logic_vector(2 downto 0) := "110"; + constant LDU8 : std_logic_vector(2 downto 0) := "111"; + + constant MSHW_MASK_LITTLE_ENDIAN : std_logic_vector := "1111111111111111111111111111111100000000000000000000000000000000"; + constant LSHW_MASK_LITTLE_ENDIAN : std_logic_vector := "0000000000000000000000000000000011111111111111111111111111111111"; + + constant ONES : std_logic_vector := "11111111"; + constant ZEROS : std_logic_vector := "00000000"; + + constant SIZE_OF_BYTE : integer := 8; + constant SIZE_OF_HW : integer := 16; + + begin + + t1data_lower_3 <= t1data(2 downto 0); + t1data_lower_2 <= t1data(2 downto 1); + + seq : process (clk, rstx) + variable opc : integer; + variable idx : integer; + begin -- process seq + + if rstx = '0' then -- asynchronous reset (active low) + addr_reg <= (others => '0'); + data_out_reg <= (others => '0'); + -- use preset instead of reset + wr_en_x_reg(0) <= '1'; + mem_en_x_reg(0) <= '1'; + wr_mask_x_reg <= (others => '1'); + idx := 1; -- status_addr_reg'length-1; + for idx in 1 downto 0 loop + status_addr_reg(idx) <= (others => '0'); + end loop; -- idx + + o1shadow_reg <= (others => '0'); + r1_reg <= (others => '0'); + + elsif clk'event and clk = '1' then -- rising clock edge + if glock = '0' then + + if t1load = '1' then + opc := conv_integer(unsigned(t1opcode)); + case opc is + when OPC_LD32 => + status_addr_reg(0) <= LD32 & t1data_lower_3; + addr_reg <= t1data(addrw-1 downto 3); + mem_en_x_reg(0) <= '0'; + wr_en_x_reg(0) <= '1'; + -- wr_mask_x_reg <= (others => '1'); + when OPC_LD16 => + status_addr_reg(0) <= LD16 & t1data_lower_3; + addr_reg <= t1data(addrw-1 downto 3); + mem_en_x_reg(0) <= '0'; + wr_en_x_reg(0) <= '1'; + -- wr_mask_x_reg <= (others => '1'); + when OPC_LD8 => + status_addr_reg(0) <= LD8 & t1data_lower_3; + addr_reg <= t1data(addrw-1 downto 3); + mem_en_x_reg(0) <= '0'; + wr_en_x_reg(0) <= '1'; + -- wr_mask_x_reg <= (others => '1'); + when OPC_LDU16 => + status_addr_reg(0) <= LDU16 & t1data_lower_3; + addr_reg <= t1data(addrw-1 downto 3); + mem_en_x_reg(0) <= '0'; + wr_en_x_reg(0) <= '1'; + -- wr_mask_x_reg <= (others => '1'); + when OPC_LDU32 => + status_addr_reg(0) <= LDU32 & t1data_lower_3; + addr_reg <= t1data(addrw-1 downto 3); + mem_en_x_reg(0) <= '0'; + wr_en_x_reg(0) <= '1'; + -- wr_mask_x_reg <= (others => '1'); + when OPC_LD64 => + status_addr_reg(0) <= LD64 & t1data_lower_3; + addr_reg <= t1data(addrw-1 downto 3); + mem_en_x_reg(0) <= '0'; + wr_en_x_reg(0) <= '1'; + -- wr_mask_x_reg <= (others => '1'); + when OPC_LDU8 => + status_addr_reg(0) <= LDU8 & t1data_lower_3; + addr_reg <= t1data(addrw-1 downto 3); + mem_en_x_reg(0) <= '0'; + wr_en_x_reg(0) <= '1'; + -- wr_mask_x_reg <= (others => '1'); + + when OPC_ST64 => + status_addr_reg(0)(5 downto 3) <= NOT_LOAD; + if o1load = '1' then + data_out_reg <= o1data; + else + data_out_reg <= o1shadow_reg; + end if; + mem_en_x_reg(0) <= '0'; + wr_en_x_reg(0) <= '0'; + wr_mask_x_reg <= (others => '0'); + addr_reg <= t1data(addrw-1 downto 3); + when OPC_ST32 => + status_addr_reg(0)(5 downto 3) <= NOT_LOAD; + -- endianes dependent code + -- DEFAULT ENDIANESS + -- little endian + addr_reg <= t1data(addrw-1 downto 3); + if o1load = '1' then + if t1data(2) = '0' then + wr_mask_x_reg <= MSHW_MASK_LITTLE_ENDIAN; + data_out_reg <= ZEROS&ZEROS&ZEROS&ZEROS&o1data(dataw/2-1 downto 0); + else + wr_mask_x_reg <= LSHW_MASK_LITTLE_ENDIAN; + data_out_reg <= o1data(dataw/2-1 downto 0)&ZEROS&ZEROS&ZEROS&ZEROS; + end if; + else + -- endianes dependent code + if t1data(2) = '0' then + wr_mask_x_reg <= MSHW_MASK_LITTLE_ENDIAN; + data_out_reg <= ZEROS&ZEROS&ZEROS&ZEROS&o1shadow_reg(dataw/2-1 downto 0); + else + wr_mask_x_reg <= LSHW_MASK_LITTLE_ENDIAN; + data_out_reg <= o1shadow_reg(dataw/2-1 downto 0)&ZEROS&ZEROS&ZEROS&ZEROS; + end if; + end if; + mem_en_x_reg(0) <= '0'; + wr_en_x_reg(0) <= '0'; + + when OPC_ST16 => + status_addr_reg(0)(5 downto 3) <= NOT_LOAD; + -- endianes dependent code + -- DEFAULT ENDIANESS + -- little endian + -- Byte # + -- |3|2|1|0| + addr_reg <= t1data(addrw-1 downto 3); + if o1load = '1' then + case t1data_lower_2 is + -- endianes dependent code + when "11" => + wr_mask_x_reg <= ZEROS&ZEROS&ONES&ONES&ONES&ONES&ONES&ONES; + data_out_reg <= o1data(SIZE_OF_HW-1 downto 0)&ZEROS&ZEROS&ZEROS&ZEROS&ZEROS&ZEROS; + when "10" => + wr_mask_x_reg <= ONES&ONES&ZEROS&ZEROS&ONES&ONES&ONES&ONES; + data_out_reg <= ZEROS&ZEROS&o1data(SIZE_OF_HW-1 downto 0)&ZEROS&ZEROS&ZEROS&ZEROS; + when "01" => + wr_mask_x_reg <= ONES&ONES&ONES&ONES&ZEROS&ZEROS&ONES&ONES; + data_out_reg <= ZEROS&ZEROS&ZEROS&ZEROS&o1data(SIZE_OF_HW-1 downto 0)&ZEROS&ZEROS; + when others => + wr_mask_x_reg <= ONES&ONES&ONES&ONES&ONES&ONES&ZEROS&ZEROS; + data_out_reg <= ZEROS&ZEROS&ZEROS&ZEROS&ZEROS&ZEROS&o1data(SIZE_OF_HW-1 downto 0); + end case; + else + case t1data_lower_2 is + -- endianes dependent code + when "11" => + wr_mask_x_reg <= ZEROS&ZEROS&ONES&ONES&ONES&ONES&ONES&ONES; + data_out_reg <= o1shadow_reg(SIZE_OF_HW-1 downto 0)&ZEROS&ZEROS&ZEROS&ZEROS&ZEROS&ZEROS; + when "10" => + wr_mask_x_reg <= ONES&ONES&ZEROS&ZEROS&ONES&ONES&ONES&ONES; + data_out_reg <= ZEROS&ZEROS&o1shadow_reg(SIZE_OF_HW-1 downto 0)&ZEROS&ZEROS&ZEROS&ZEROS; + when "01" => + wr_mask_x_reg <= ONES&ONES&ONES&ONES&ZEROS&ZEROS&ONES&ONES; + data_out_reg <= ZEROS&ZEROS&ZEROS&ZEROS&o1shadow_reg(SIZE_OF_HW-1 downto 0)&ZEROS&ZEROS; + when others => + wr_mask_x_reg <= ONES&ONES&ONES&ONES&ONES&ONES&ZEROS&ZEROS; + data_out_reg <= ZEROS&ZEROS&ZEROS&ZEROS&ZEROS&ZEROS&o1shadow_reg(SIZE_OF_HW-1 downto 0); + end case; + end if; + mem_en_x_reg(0) <= '0'; + wr_en_x_reg(0) <= '0'; + + when OPC_ST8 => + status_addr_reg(0)(5 downto 3) <= NOT_LOAD; + -- endianes dependent code + -- DEFAULT ENDIANESS + -- little endian + addr_reg <= t1data(addrw-1 downto 3); + if o1load = '1' then + case t1data_lower_3 is + -- endianes dependent code + when "111" => + wr_mask_x_reg <= ZEROS&ONES&ONES&ONES&ONES&ONES&ONES&ONES; + data_out_reg <= o1data(SIZE_OF_BYTE-1 downto 0)&ZEROS&ZEROS&ZEROS&ZEROS&ZEROS&ZEROS&ZEROS; + when "110" => + wr_mask_x_reg <= ONES&ZEROS&ONES&ONES&ONES&ONES&ONES&ONES; + data_out_reg <= ZEROS&o1data(SIZE_OF_BYTE-1 downto 0)&ZEROS&ZEROS&ZEROS&ZEROS&ZEROS&ZEROS; + when "101" => + wr_mask_x_reg <= ONES&ONES&ZEROS&ONES&ONES&ONES&ONES&ONES; + data_out_reg <= ZEROS&ZEROS&o1data(SIZE_OF_BYTE-1 downto 0)&ZEROS&ZEROS&ZEROS&ZEROS&ZEROS; + when "100" => + wr_mask_x_reg <= ONES&ONES&ONES&ZEROS&ONES&ONES&ONES&ONES; + data_out_reg <= ZEROS&ZEROS&ZEROS&o1data(SIZE_OF_BYTE-1 downto 0)&ZEROS&ZEROS&ZEROS&ZEROS; + when "011" => + wr_mask_x_reg <= ONES&ONES&ONES&ONES&ZEROS&ONES&ONES&ONES; + data_out_reg <= ZEROS&ZEROS&ZEROS&ZEROS&o1data(SIZE_OF_BYTE-1 downto 0)&ZEROS&ZEROS&ZEROS; + when "010" => + wr_mask_x_reg <= ONES&ONES&ONES&ONES&ONES&ZEROS&ONES&ONES; + data_out_reg <= ZEROS&ZEROS&ZEROS&ZEROS&ZEROS&o1data(SIZE_OF_BYTE-1 downto 0)&ZEROS&ZEROS; + when "001" => + wr_mask_x_reg <= ONES&ONES&ONES&ONES&ONES&ONES&ZEROS&ONES; + data_out_reg <= ZEROS&ZEROS&ZEROS&ZEROS&ZEROS&ZEROS&o1data(SIZE_OF_BYTE-1 downto 0)&ZEROS; + when others => + wr_mask_x_reg <= ONES&ONES&ONES&ONES&ONES&ONES&ONES&ZEROS; + data_out_reg <= ZEROS&ZEROS&ZEROS&ZEROS&ZEROS&ZEROS&ZEROS&o1data(SIZE_OF_BYTE-1 downto 0); + end case; + else + case t1data_lower_3 is + -- endianes dependent code + when "111" => + wr_mask_x_reg <= ZEROS&ONES&ONES&ONES&ONES&ONES&ONES&ONES; + data_out_reg <= o1shadow_reg(SIZE_OF_BYTE-1 downto 0)&ZEROS&ZEROS&ZEROS&ZEROS&ZEROS&ZEROS&ZEROS; + when "110" => + wr_mask_x_reg <= ONES&ZEROS&ONES&ONES&ONES&ONES&ONES&ONES; + data_out_reg <= ZEROS&o1shadow_reg(SIZE_OF_BYTE-1 downto 0)&ZEROS&ZEROS&ZEROS&ZEROS&ZEROS&ZEROS; + when "101" => + wr_mask_x_reg <= ONES&ONES&ZEROS&ONES&ONES&ONES&ONES&ONES; + data_out_reg <= ZEROS&ZEROS&o1shadow_reg(SIZE_OF_BYTE-1 downto 0)&ZEROS&ZEROS&ZEROS&ZEROS&ZEROS; + when "100" => + wr_mask_x_reg <= ONES&ONES&ONES&ZEROS&ONES&ONES&ONES&ONES; + data_out_reg <= ZEROS&ZEROS&ZEROS&o1shadow_reg(SIZE_OF_BYTE-1 downto 0)&ZEROS&ZEROS&ZEROS&ZEROS; + when "011" => + wr_mask_x_reg <= ONES&ONES&ONES&ONES&ZEROS&ONES&ONES&ONES; + data_out_reg <= ZEROS&ZEROS&ZEROS&ZEROS&o1shadow_reg(SIZE_OF_BYTE-1 downto 0)&ZEROS&ZEROS&ZEROS; + when "010" => + wr_mask_x_reg <= ONES&ONES&ONES&ONES&ONES&ZEROS&ONES&ONES; + data_out_reg <= ZEROS&ZEROS&ZEROS&ZEROS&ZEROS&o1shadow_reg(SIZE_OF_BYTE-1 downto 0)&ZEROS&ZEROS; + when "001" => + wr_mask_x_reg <= ONES&ONES&ONES&ONES&ONES&ONES&ZEROS&ONES; + data_out_reg <= ZEROS&ZEROS&ZEROS&ZEROS&ZEROS&ZEROS&o1shadow_reg(SIZE_OF_BYTE-1 downto 0)&ZEROS; + when others => + wr_mask_x_reg <= ONES&ONES&ONES&ONES&ONES&ONES&ONES&ZEROS; + data_out_reg <= ZEROS&ZEROS&ZEROS&ZEROS&ZEROS&ZEROS&ZEROS&o1shadow_reg(SIZE_OF_BYTE-1 downto 0); + end case; + end if; + mem_en_x_reg(0) <= '0'; + wr_en_x_reg(0) <= '0'; + when others => + null; + end case; + else + status_addr_reg(0)(5 downto 3) <= NOT_LOAD; + wr_en_x_reg(0) <= '1'; + mem_en_x_reg(0) <= '1'; + end if; + + if o1load = '1' then + o1shadow_reg <= o1data; + end if; + + status_addr_reg(1) <= status_addr_reg(0); + + if status_addr_reg(1)(5 downto 3) = LD64 then + r1_reg <= data_in; + elsif status_addr_reg(1)(5 downto 3) = LD32 then + -- endianes dependent code + -- select either upper or lower part of the word + if status_addr_reg(1)(1) = '1' then + r1_reg <= SXT(data_in(dataw-1 downto dataw/2), r1_reg'length); + else + r1_reg <= SXT(data_in(dataw/2-1 downto 0), r1_reg'length); + end if; + + elsif status_addr_reg(1)(5 downto 3) = LD16 then + case status_addr_reg(1)(2 downto 1) is + -- endianes dependent code + when "11" => + r1_reg <= SXT(data_in(dataw-1 downto dataw-SIZE_OF_HW), r1_reg'length); + when "10" => + r1_reg <= SXT(data_in(dataw-SIZE_OF_HW-1 downto dataw-2*SIZE_OF_HW), r1_reg'length); + when "01" => + r1_reg <= SXT(data_in(dataw-2*SIZE_OF_HW-1 downto dataw-3*SIZE_OF_HW), r1_reg'length); + when others => + r1_reg <= SXT(data_in(dataw-3*SIZE_OF_HW-1 downto dataw-4*SIZE_OF_HW), r1_reg'length); + end case; + + elsif status_addr_reg(1)(5 downto 3) = LD8 then + case status_addr_reg(1)(2 downto 0) is + -- endianes dependent code + when "111" => + r1_reg <= SXT(data_in(dataw-1 downto dataw-SIZE_OF_BYTE), r1_reg'length); + when "110" => + r1_reg <= SXT(data_in(dataw-SIZE_OF_BYTE-1 downto dataw-2*SIZE_OF_BYTE), r1_reg'length); + when "101" => + r1_reg <= SXT(data_in(dataw-2*SIZE_OF_BYTE-1 downto dataw-3*SIZE_OF_BYTE), r1_reg'length); + when "100" => + r1_reg <= SXT(data_in(dataw-3*SIZE_OF_BYTE-1 downto dataw-4*SIZE_OF_BYTE), r1_reg'length); + when "011" => + r1_reg <= SXT(data_in(dataw-4*SIZE_OF_BYTE-1 downto dataw-5*SIZE_OF_BYTE), r1_reg'length); + when "010" => + r1_reg <= SXT(data_in(dataw-5*SIZE_OF_BYTE-1 downto dataw-6*SIZE_OF_BYTE), r1_reg'length); + when "001" => + r1_reg <= SXT(data_in(dataw-6*SIZE_OF_BYTE-1 downto dataw-7*SIZE_OF_BYTE), r1_reg'length); + when others => + r1_reg <= SXT(data_in(dataw-7*SIZE_OF_BYTE-1 downto dataw-8*SIZE_OF_BYTE), r1_reg'length); + end case; + + elsif status_addr_reg(1)(5 downto 3) = LDU32 then + -- endianes dependent code + -- select either upper or lower part of the word + if status_addr_reg(1)(1) = '1' then + r1_reg <= EXT(data_in(dataw-1 downto dataw/2), r1_reg'length); + else + r1_reg <= EXT(data_in(dataw/2-1 downto 0), r1_reg'length); + end if; + + elsif status_addr_reg(1)(5 downto 3) = LDU16 then + case status_addr_reg(1)(2 downto 1) is + -- endianes dependent code + when "11" => + r1_reg <= EXT(data_in(dataw-1 downto dataw-SIZE_OF_HW), r1_reg'length); + when "10" => + r1_reg <= EXT(data_in(dataw-SIZE_OF_HW-1 downto dataw-2*SIZE_OF_HW), r1_reg'length); + when "01" => + r1_reg <= EXT(data_in(dataw-2*SIZE_OF_HW-1 downto dataw-3*SIZE_OF_HW), r1_reg'length); + when others => + r1_reg <= EXT(data_in(dataw-3*SIZE_OF_HW-1 downto dataw-4*SIZE_OF_HW), r1_reg'length); + end case; + + elsif status_addr_reg(1)(5 downto 3) = LDU8 then + case status_addr_reg(1)(2 downto 0) is + -- endianes dependent code + when "111" => + r1_reg <= EXT(data_in(dataw-1 downto dataw-SIZE_OF_BYTE), r1_reg'length); + when "110" => + r1_reg <= EXT(data_in(dataw-SIZE_OF_BYTE-1 downto dataw-2*SIZE_OF_BYTE), r1_reg'length); + when "101" => + r1_reg <= EXT(data_in(dataw-2*SIZE_OF_BYTE-1 downto dataw-3*SIZE_OF_BYTE), r1_reg'length); + when "100" => + r1_reg <= EXT(data_in(dataw-3*SIZE_OF_BYTE-1 downto dataw-4*SIZE_OF_BYTE), r1_reg'length); + when "011" => + r1_reg <= EXT(data_in(dataw-4*SIZE_OF_BYTE-1 downto dataw-5*SIZE_OF_BYTE), r1_reg'length); + when "010" => + r1_reg <= EXT(data_in(dataw-5*SIZE_OF_BYTE-1 downto dataw-6*SIZE_OF_BYTE), r1_reg'length); + when "001" => + r1_reg <= EXT(data_in(dataw-6*SIZE_OF_BYTE-1 downto dataw-7*SIZE_OF_BYTE), r1_reg'length); + when others => + r1_reg <= EXT(data_in(dataw-7*SIZE_OF_BYTE-1 downto dataw-8*SIZE_OF_BYTE), r1_reg'length); + end case; + + end if; + + end if; + end if; + + end process seq; + + mem_en_x(0) <= mem_en_x_reg(0) or glock; + wr_en_x <= wr_en_x_reg; + wr_mask_x <= wr_mask_x_reg; + data_out <= data_out_reg; + addr <= addr_reg; + r1data <= r1_reg; + +end rtl; + + + diff --git a/tce/hdb/vhdl/MUL64_MAC64.vhdl b/tce/hdb/vhdl/MUL64_MAC64.vhdl new file mode 100644 index 0000000000..fd3212d33e --- /dev/null +++ b/tce/hdb/vhdl/MUL64_MAC64.vhdl @@ -0,0 +1,166 @@ +-- Copyright (c) 2002-2009 Tampere University of Technology. +-- +-- This file is part of TTA-Based Codesign Environment (TCE). +-- +-- Permission is hereby granted, free of charge, to any person obtaining a +-- copy of this software and associated documentation files (the "Software"), +-- to deal in the Software without restriction, including without limitation +-- the rights to use, copy, modify, merge, publish, distribute, sublicense, +-- and/or sell copies of the Software, and to permit persons to whom the +-- Software is furnished to do so, subject to the following conditions: +-- +-- The above copyright notice and this permission notice shall be included in +-- all copies or substantial portions of the Software. +-- +-- THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +-- IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +-- FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +-- AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +-- LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +-- FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +-- DEALINGS IN THE SOFTWARE. +------------------------------------------------------------------------------- + +-- 64-BIT Multiplication and MAC Operation FU for TTA64 Project. +-- Designer: Latif AKCAY +-- University: Bayburt University, Istanbul Technical University, TURKEY. + +library IEEE; +use IEEE.Std_Logic_1164.all; + +package opcodes_mac64_mul64 is + + constant MUL64_OPC : std_logic_vector(0 downto 0) := "1"; + constant MAC64_OPC : std_logic_vector(0 downto 0) := "0"; + +end opcodes_mac64_mul64; + +library IEEE; +use IEEE.Std_Logic_1164.all; +use IEEE.Std_Logic_arith.all; +use ieee.std_logic_misc.all; +use work.util.all; +use work.opcodes_mac64_mul64.all; + +entity fu_mac64_mul64_always_2 is + + generic ( + busw : integer := 64; + dataw : integer := 64 + ); + + port ( + clk : in std_logic; + rstx : in std_logic; + glock : in std_logic; + operation_in : in std_logic_vector(0 downto 0); + data_in1t_in : in std_logic_vector(63 downto 0); + load_in1t_in : in std_logic; + data_in2_in : in std_logic_vector(63 downto 0); + load_in2_in : in std_logic; + data_in3_in : in std_logic_vector(63 downto 0); + load_in3_in : in std_logic; + data_out1_out : out std_logic_vector(63 downto 0) + ); +end entity fu_mac64_mul64_always_2; + +architecture rtl of fu_mac64_mul64_always_2 is + + signal data_in1t : std_logic_vector(63 downto 0); + signal data_in2 : std_logic_vector(63 downto 0); + signal data_in3 : std_logic_vector(63 downto 0); + + signal shadow_in2_r : std_logic_vector(63 downto 0); + signal shadow_in3_r : std_logic_vector(63 downto 0); + signal operation_1_r : std_logic_vector(0 downto 0); + signal optrig_1_r : std_logic; + signal data_out1_r : std_logic_vector(127 downto 0); + signal data_out1_reg : std_logic_vector(127 downto 0); + signal data_out1_1_valid_r : std_logic; + +begin + + data_in1t <= data_in1t_in; + + shadow_in2_in3_sp : process(clk, rstx) + begin + if rstx = '0' then + shadow_in2_r <= (others => '0'); + shadow_in3_r <= (others => '0'); + elsif clk = '1' and clk'event then + if ((glock = '0') and (load_in2_in = '1')) then + shadow_in2_r <= data_in2_in; + end if; + if ((glock = '0') and (load_in3_in = '1')) then + shadow_in3_r <= data_in2_in; + end if; + end if; + end process shadow_in2_in3_sp; + + shadow_in2_in3_cp : process(shadow_in2_r, shadow_in3_r, data_in2_in, data_in3_in, load_in1t_in, load_in2_in, load_in3_in) + begin + if ((load_in1t_in = '1') and (load_in2_in = '1')) then + data_in2 <= data_in2_in; + else + data_in2 <= shadow_in2_r; + end if; + if ((load_in1t_in = '1') and (load_in3_in = '1')) then + data_in3 <= data_in3_in; + else + data_in3 <= shadow_in3_r; + end if; + end process shadow_in2_in3_cp; + + input_pipeline_sp : process(clk, rstx) + begin + if rstx = '0' then + operation_1_r <= (others => '0'); + optrig_1_r <= '0'; + elsif clk = '1' and clk'event then + if (glock = '0') then + optrig_1_r <= load_in1t_in; + if (load_in1t_in = '1') then + operation_1_r <= operation_in; + end if; + end if; + end if; + end process input_pipeline_sp; + + output_pipeline_sp : process(clk, rstx) + begin + if rstx = '0' then + data_out1_1_valid_r <= '0'; + data_out1_r <= (others => '0'); + data_out1_reg <= (others => '0'); + elsif clk = '1' and clk'event then + if (glock = '0') then + data_out1_1_valid_r <= load_in1t_in; + if load_in1t_in = '1' then + case operation_in is + when MUL64_OPC => + data_out1_r <= conv_std_logic_vector(signed(data_in1t) * signed(data_in2), data_out1_r'length ); + when MAC64_OPC => + data_out1_r <= conv_std_logic_vector((unsigned(data_in1t) * unsigned(data_in2) + unsigned(data_in3)), data_out1_r'length ); + when others => + end case; + end if; + if (data_out1_1_valid_r = '1') then + data_out1_reg <= data_out1_r; + end if; + end if; + end if; + end process output_pipeline_sp; + + data_out1_out <= "00000000000000000000000000000000" & data_out1_reg(31 downto 0); -- result has to be compatible with the currrent version of the operation! + +end architecture rtl; + + + + + + + + + + diff --git a/tce/icdecoder_plugins/DefaultICDecoderPlugin.cc b/tce/icdecoder_plugins/DefaultICDecoderPlugin.cc index 51893b65e6..02b68fc370 100644 --- a/tce/icdecoder_plugins/DefaultICDecoderPlugin.cc +++ b/tce/icdecoder_plugins/DefaultICDecoderPlugin.cc @@ -30,6 +30,10 @@ * @author Vinogradov Viacheslav(added Verilog generating) 2012 * @note rating: red */ + + /* + Description: Default code was edited by Latif AKÇAY to make it compatible with 64-bit TTA processor designs. + */ #include #include @@ -985,7 +989,7 @@ class DefaultICDecoderGenerator : public ICDecoderGeneratorPlugin { } //Figure out some constants - int dbgDataWidth=32; + int dbgDataWidth=64; Machine::BusNavigator busNav = ttamachine_.busNavigator(); int bustrace_width = dbgDataWidth*busNav.count(); @@ -998,16 +1002,16 @@ class DefaultICDecoderGenerator : public ICDecoderGeneratorPlugin { "db_pc", "IMEMADDRWIDTH", ProGe::BIT_VECTOR, HDB::OUT, toplevelBlock); NetlistPort* ttaBustracePort = new NetlistPort( - "db_bustraces", "32*BUSCOUNT", + "db_bustraces", "64*BUSCOUNT", bustrace_width, ProGe::BIT_VECTOR, HDB::OUT, toplevelBlock); NetlistPort* ttaInstrPort = new NetlistPort( "db_instr", "IMEMDATAWIDTH", ProGe::BIT_VECTOR, HDB::OUT, toplevelBlock); NetlistPort* ttaLockcountPort = new NetlistPort( - "db_lockcnt", "32", dbgDataWidth, + "db_lockcnt", "64", dbgDataWidth, ProGe::BIT_VECTOR, HDB::OUT, toplevelBlock); NetlistPort* ttaCyclecountPort = new NetlistPort( - "db_cyclecnt", "32", dbgDataWidth, + "db_cyclecnt", "64", dbgDataWidth, ProGe::BIT_VECTOR, HDB::OUT, toplevelBlock); NetlistPort* ttaResetPort = new NetlistPort( "db_tta_nreset", "1", ProGe::BIT, HDB::IN, toplevelBlock); @@ -1049,12 +1053,12 @@ class DefaultICDecoderGenerator : public ICDecoderGeneratorPlugin { *fetchBlock); toplevelBlock.netlist().connectPorts(*ifetchPCNextPort, *dbPCNextPort); NetlistPort* ifetchCyclecountPort = new NetlistPort( - "db_cyclecnt", "32", 32, ProGe::BIT_VECTOR, HDB::OUT, + "db_cyclecnt", "64", 64, ProGe::BIT_VECTOR, HDB::OUT, *fetchBlock); toplevelBlock.netlist().connectPorts( *ifetchCyclecountPort, *ttaCyclecountPort); NetlistPort* ifetchLockcountPort = new NetlistPort( - "db_lockcnt", "32", 32, ProGe::BIT_VECTOR, HDB::OUT, + "db_lockcnt", "64", 64, ProGe::BIT_VECTOR, HDB::OUT, *fetchBlock); toplevelBlock.netlist().connectPorts( *ifetchLockcountPort, *ttaLockcountPort);