diff --git a/dv/riscv_compliance/ibex_riscv_compliance.core b/dv/riscv_compliance/ibex_riscv_compliance.core index edc7fcbffa..87012c1fc1 100644 --- a/dv/riscv_compliance/ibex_riscv_compliance.core +++ b/dv/riscv_compliance/ibex_riscv_compliance.core @@ -42,6 +42,12 @@ parameters: paramtype: vlogdefine description: "Bitmanip implementation parameter enum. See the ibex_pkg::rv32b_e enum in ibex_pkg.sv for permitted values." + RV32K: + datatype: str + default: ibex_pkg::RV32ZkNone + paramtype: vlogdefine + description: "Cryptography extension implementation parameter enum. See the ibex_pkg::rv32zk_e enum in ibex_pkg.sv for permitted values." + RegFile: datatype: str default: ibex_pkg::RegFileFF @@ -112,6 +118,7 @@ targets: - RV32E - RV32M - RV32B + - RV32K - RegFile - ICache - ICacheECC diff --git a/dv/riscv_compliance/rtl/ibex_riscv_compliance.sv b/dv/riscv_compliance/rtl/ibex_riscv_compliance.sv index 5441a13561..bf87c078b6 100644 --- a/dv/riscv_compliance/rtl/ibex_riscv_compliance.sv +++ b/dv/riscv_compliance/rtl/ibex_riscv_compliance.sv @@ -10,6 +10,10 @@ * simulators (if the top-level clk and rst ports are replaced with a generated * clock). */ + `ifndef RV32K + `define RV32K ibex_pkg::RV32ZkNone + `endif + module ibex_riscv_compliance ( input IO_CLK, input IO_RST_N @@ -21,6 +25,8 @@ module ibex_riscv_compliance ( parameter bit RV32E = 1'b0; parameter ibex_pkg::rv32m_e RV32M = ibex_pkg::RV32MFast; parameter ibex_pkg::rv32b_e RV32B = ibex_pkg::RV32BNone; + parameter ibex_pkg::rv32zk_e RV32K = `RV32K; + parameter ibex_pkg::regfile_e RegFile = ibex_pkg::RegFileFF; parameter bit BranchTargetALU = 1'b0; parameter bit WritebackStage = 1'b0; @@ -120,6 +126,7 @@ module ibex_riscv_compliance ( .RV32E (RV32E ), .RV32M (RV32M ), .RV32B (RV32B ), + .RV32K (RV32K ), .RegFile (RegFile ), .BranchTargetALU (BranchTargetALU ), .WritebackStage (WritebackStage ), diff --git a/dv/verilator/simple_system_cosim/ibex_simple_system_cosim.core b/dv/verilator/simple_system_cosim/ibex_simple_system_cosim.core index bc9acfcaa2..29710a2a2a 100644 --- a/dv/verilator/simple_system_cosim/ibex_simple_system_cosim.core +++ b/dv/verilator/simple_system_cosim/ibex_simple_system_cosim.core @@ -35,6 +35,12 @@ parameters: paramtype: vlogdefine description: "Bitmanip implementation parameter enum. See the ibex_pkg::rv32b_e enum in ibex_pkg.sv for permitted values." + RV32K: + datatype: str + default: ibex_pkg::RV32ZkNone + paramtype: vlogdefine + description: "Cryptography extension implementation parameter enum. See the ibex_pkg::rv32zk_e enum in ibex_pkg.sv for permitted values." + RegFile: datatype: str default: ibex_pkg::RegFileFF @@ -109,6 +115,7 @@ targets: - RV32E - RV32M - RV32B + - RV32K - RegFile - ICache - ICacheECC diff --git a/examples/simple_system/ibex_simple_system.core b/examples/simple_system/ibex_simple_system.core index 1813bab074..5b633a2775 100644 --- a/examples/simple_system/ibex_simple_system.core +++ b/examples/simple_system/ibex_simple_system.core @@ -31,6 +31,12 @@ parameters: paramtype: vlogdefine description: "Bitmanip implementation parameter enum. See the ibex_pkg::rv32b_e enum in ibex_pkg.sv for permitted values." + RV32K: + datatype: str + default: ibex_pkg::RV32ZkNone + paramtype: vlogdefine + description: "Cryptography extension implementation parameter enum. See the ibex_pkg::rv32zk_e enum in ibex_pkg.sv for permitted values." + RegFile: datatype: str default: ibex_pkg::RegFileFF @@ -105,6 +111,7 @@ targets: - RV32E - RV32M - RV32B + - RV32K - RegFile - ICache - ICacheECC diff --git a/examples/simple_system/rtl/ibex_simple_system.sv b/examples/simple_system/rtl/ibex_simple_system.sv index f54a8983f0..4f17ba01ae 100644 --- a/examples/simple_system/rtl/ibex_simple_system.sv +++ b/examples/simple_system/rtl/ibex_simple_system.sv @@ -14,6 +14,10 @@ `define RV32B ibex_pkg::RV32BNone `endif +`ifndef RV32K + `define RV32K ibex_pkg::RV32ZkNone +`endif + `ifndef RegFile `define RegFile ibex_pkg::RegFileFF `endif @@ -42,6 +46,7 @@ module ibex_simple_system ( parameter bit RV32E = 1'b0; parameter ibex_pkg::rv32m_e RV32M = `RV32M; parameter ibex_pkg::rv32b_e RV32B = `RV32B; + parameter ibex_pkg::rv32zk_e RV32K = `RV32K; parameter ibex_pkg::regfile_e RegFile = `RegFile; parameter bit BranchTargetALU = 1'b0; parameter bit WritebackStage = 1'b0; @@ -170,6 +175,7 @@ module ibex_simple_system ( .RV32E ( RV32E ), .RV32M ( RV32M ), .RV32B ( RV32B ), + .RV32K ( RV32K ), .RegFile ( RegFile ), .BranchTargetALU ( BranchTargetALU ), .ICache ( ICache ), diff --git a/ibex_configs.yaml b/ibex_configs.yaml index a9d3172a56..59a7cfe6ef 100644 --- a/ibex_configs.yaml +++ b/ibex_configs.yaml @@ -11,6 +11,7 @@ small: RV32E : 0 RV32M : "ibex_pkg::RV32MFast" RV32B : "ibex_pkg::RV32BNone" + RV32K : "ibex_pkg::RV32ZkNone" RegFile : "ibex_pkg::RegFileFF" BranchTargetALU : 0 WritebackStage : 0 @@ -27,6 +28,7 @@ opentitan: RV32E : 0 RV32M : "ibex_pkg::RV32MSingleCycle" RV32B : "ibex_pkg::RV32BOTEarlGrey" + RV32K : "ibex_pkg::RV32ZkNone" RegFile : "ibex_pkg::RegFileFF" BranchTargetALU : 1 WritebackStage : 1 @@ -49,6 +51,7 @@ experimental-maxperf: RV32E : 0 RV32M : "ibex_pkg::RV32MSingleCycle" RV32B : "ibex_pkg::RV32BNone" + RV32K : "ibex_pkg::RV32ZkNone" RegFile : "ibex_pkg::RegFileFF" BranchTargetALU : 1 WritebackStage : 1 @@ -65,6 +68,7 @@ experimental-maxperf-pmp: RV32E : 0 RV32M : "ibex_pkg::RV32MSingleCycle" RV32B : "ibex_pkg::RV32BNone" + RV32K : "ibex_pkg::RV32ZkNone" RegFile : "ibex_pkg::RegFileFF" BranchTargetALU : 1 WritebackStage : 1 @@ -81,6 +85,7 @@ experimental-maxperf-pmp-bmbalanced: RV32E : 0 RV32M : "ibex_pkg::RV32MSingleCycle" RV32B : "ibex_pkg::RV32BBalanced" + RV32K : "ibex_pkg::RV32ZkNone" RegFile : "ibex_pkg::RegFileFF" BranchTargetALU : 1 WritebackStage : 1 @@ -97,6 +102,7 @@ experimental-maxperf-pmp-bmfull: RV32E : 0 RV32M : "ibex_pkg::RV32MSingleCycle" RV32B : "ibex_pkg::RV32BFull" + RV32K : "ibex_pkg::RV32ZkNone" RegFile : "ibex_pkg::RegFileFF" BranchTargetALU : 1 WritebackStage : 1 @@ -108,11 +114,47 @@ experimental-maxperf-pmp-bmfull: PMPNumRegions : 16 SecureIbex : 0 +# experimental-maxperf-pmp config above with zkn extension +experimental-maxperf-pmp-zkn: + RV32E : 0 + RV32M : "ibex_pkg::RV32MSingleCycle" + RV32B : "ibex_pkg::RV32BNone" + RV32K : "ibex_pkg::RV32Zkn" + RegFile : "ibex_pkg::RegFileFF" + BranchTargetALU : 1 + WritebackStage : 1 + ICache : 0 + ICacheECC : 0 + BranchPredictor : 0 + PMPEnable : 1 + PMPGranularity : 0 + PMPNumRegions : 16 + SecureIbex : 0 + +# experimental-maxperf-pmp config above with zks extension +experimental-maxperf-pmp-zks: + RV32E : 0 + RV32M : "ibex_pkg::RV32MSingleCycle" + RV32B : "ibex_pkg::RV32BNone" + RV32K : "ibex_pkg::RV32Zks" + RegFile : "ibex_pkg::RegFileFF" + BranchTargetALU : 1 + WritebackStage : 1 + ICache : 0 + ICacheECC : 0 + BranchPredictor : 0 + PMPEnable : 1 + PMPGranularity : 0 + PMPNumRegions : 16 + SecureIbex : 0 + + # experimental-maxperf-pmp-bmfull config above with icache enabled experimental-maxperf-pmp-bmfull-icache: RV32E : 0 RV32M : "ibex_pkg::RV32MSingleCycle" RV32B : "ibex_pkg::RV32BFull" + RV32K : "ibex_pkg::RV32ZkNone" RegFile : "ibex_pkg::RegFileFF" BranchTargetALU : 1 WritebackStage : 1 @@ -132,6 +174,7 @@ experimental-branch-predictor: RV32E : 0 RV32M : "ibex_pkg::RV32MSingleCycle" RV32B : "ibex_pkg::RV32BNone" + RV32K : "ibex_pkg::RV32ZkNone" RegFile : "ibex_pkg::RegFileFF" BranchTargetALU : 1 WritebackStage : 1 @@ -143,3 +186,4 @@ experimental-branch-predictor: PMPNumRegions : 4 SecureIbex : 0 + diff --git a/ibex_core.core b/ibex_core.core index a77ba5999a..c84f68368a 100644 --- a/ibex_core.core +++ b/ibex_core.core @@ -34,6 +34,10 @@ filesets: - rtl/ibex_pmp.sv - rtl/ibex_wb_stage.sv - rtl/ibex_dummy_instr.sv + - rtl/ibex_zk.sv + - rtl/ibex_aes_sbox.sv + - rtl/ibex_sm4_sbox.sv + - rtl/ibex_poly16_mul.sv - rtl/ibex_core.sv file_type: systemVerilogSource @@ -81,6 +85,12 @@ parameters: paramtype: vlogdefine description: "Bitmanip implementation parameter enum. See the ibex_pkg::rv32b_e enum in ibex_pkg.sv for permitted values." + RV32K: + datatype: str + default: ibex_pkg::RV32ZkNone + paramtype: vlogdefine + description: "Cryptography extension implementation parameter enum. See the ibex_pkg::rv32zk_e enum in ibex_pkg.sv for permitted values." + RegFile: datatype: str default: ibex_pkg::RegFileFF diff --git a/ibex_top_tracing.core b/ibex_top_tracing.core index 550e5a18b6..92c751226f 100644 --- a/ibex_top_tracing.core +++ b/ibex_top_tracing.core @@ -41,6 +41,12 @@ parameters: paramtype: vlogdefine description: "Bitmanip implementation parameter enum. See the ibex_pkg::rv32b_e enum in ibex_pkg.sv for permitted values." + RV32K: + datatype: str + default: ibex_pkg::RV32ZkNone + paramtype: vlogdefine + description: "Cryptography extension implementation parameter enum. See the ibex_pkg::rv32zk_e enum in ibex_pkg.sv for permitted values." + RegFile: datatype: str default: ibex_pkg::RegFileFF @@ -117,6 +123,7 @@ targets: - RV32E - RV32M - RV32B + - RV32K - RegFile - ICache - ICacheECC diff --git a/rtl/ibex_aes_sbox.sv b/rtl/ibex_aes_sbox.sv new file mode 100644 index 0000000000..ebd18a4489 --- /dev/null +++ b/rtl/ibex_aes_sbox.sv @@ -0,0 +1,288 @@ +// Copyright lowRISC contributors. +// Copyright 2018 ETH Zurich and University of Bologna, see also CREDITS.md. +// Licensed under the Apache License, Version 2.0, see LICENSE for details. +// SPDX-License-Identifier: Apache-2.0 + +/** + * AES Sbox unit + * This modified version is based on the optimised AES structure proposed by Boyar & Peralta [BoPe12]. + * AES S-Boxes are broken into a nonlinear middle layer and two linear top and bottom layers. + * The middle layer perform non-linear field inversion which is used for both forward and inverse S-boxes. + * + * [BoPe12] Boyar J., Peralta R. "A Small Depth-16 Circuit for the AES + * S-Box." Proc.SEC 2012. IFIP AICT 376. Springer, pp. 287-298 (2012) + * DOI: https://doi.org/10.1007/978-3-642-30436-1_24 + * Preprint: https://eprint.iacr.org/2011/332.pdf + */ +module ibex_aes_sbox ( +input logic fw, +input logic [7:0] in, +output logic [7:0] fx + +); + +// aes_sbox_top +function automatic logic [20:0] aes_sbox_top(logic [7:0] x); + logic y20; + logic y19, y18, y17, y16, y15, y14, y13, y12, y11, y10; + logic y9, y8, y7, y6, y5, y4, y3, y2, y1, y0 ; + logic t5, t4, t3, t2, t1, t0 ; + + y0 = x[ 0] ; + y1 = x[ 7] ^ x[ 4]; + y2 = x[ 7] ^ x[ 2]; + y3 = x[ 7] ^ x[ 1]; + y4 = x[ 4] ^ x[ 2]; + t0 = x[ 3] ^ x[ 1]; + y5 = y1 ^ t0 ; + t1 = x[ 6] ^ x[ 5]; + y6 = x[ 0] ^ y5 ; + y7 = x[ 0] ^ t1 ; + y8 = y5 ^ t1 ; + t2 = x[ 6] ^ x[ 2]; + t3 = x[ 5] ^ x[ 2]; + y9 = y3 ^ y4 ; + y10 = y5 ^ t2 ; + y11 = t0 ^ t2 ; + y12 = t0 ^ t3 ; + y13 = y7 ^ y12 ; + t4 = x[ 4] ^ x[ 0]; + y14 = t1 ^ t4 ; + y15 = y1 ^ y14 ; + t5 = x[ 1] ^ x[ 0]; + y16 = t1 ^ t5 ; + y17 = y2 ^ y16 ; + y18 = y2 ^ y8 ; + y19 = y15 ^ y13 ; + y20 = y1 ^ t3 ; + + return {y20, y19, y18, y17, y16, y15, y14, y13, y12, y11, + y10, y9, y8, y7, y6, y5, y4, y3, y2, y1, y0}; +endfunction + +// aes_sbox_out +function automatic logic [7:0] aes_sbox_out(logic [17:0] x); + logic [7:0] y; + logic t29, t28, t27, t26, t25, t24, t23, t22, t21, t20; + logic t19, t18, t17, t16, t15, t14, t13, t12, t11, t10; + logic t9, t8, t7, t6, t5, t4, t3, t2, t1, t0 ; + t0 = x[11] ^ x[12]; + t1 = x[0] ^ x[6]; + t2 = x[14] ^ x[16]; + t3 = x[15] ^ x[5]; + t4 = x[4] ^ x[8]; + t5 = x[17] ^ x[11]; + t6 = x[12] ^ t5; + t7 = x[14] ^ t3; + t8 = x[1] ^ x[9]; + t9 = x[2] ^ x[3]; + t10 = x[3] ^ t4; + t11 = x[10] ^ t2; + t12 = x[16] ^ x[1]; + t13 = x[0] ^ t0; + t14 = x[2] ^ x[11]; + t15 = x[5] ^ t1; + t16 = x[6] ^ t0; + t17 = x[7] ^ t1; + t18 = x[8] ^ t8; + t19 = x[13] ^ t4; + t20 = t0 ^ t1; + t21 = t1 ^ t7; + t22 = t3 ^ t12; + t23 = t18 ^ t2; + t24 = t15 ^ t9; + t25 = t6 ^ t10; + t26 = t7 ^ t9; + t27 = t8 ^ t10; + t28 = t11 ^ t14; + t29 = t11 ^ t17; + y[0] = t6 ^~ t23; + y[1] = t13 ^~ t27; + y[2] = t25 ^ t29; + y[3] = t20 ^ t22; + y[4] = t6 ^ t21; + y[5] = t19 ^~ t28; + y[6] = t16 ^~ t26; + y[7] = t6 ^ t24; + return y; +endfunction + + +// aes_sbox_inv_mid +function automatic logic [17:0] aes_sbox_inv_mid(logic [20:0] x); + logic [17:0] y; + logic t45, t44, t43, t42, t41, t40; + logic t39, t38, t37, t36, t35, t34, t33, t32, t31, t30; + logic t29, t28, t27, t26, t25, t24, t23, t22, t21, t20; + logic t19, t18, t17, t16, t15, t14, t13, t12, t11, t10; + logic t9, t8, t7, t6, t5, t4, t3, t2, t1, t0 ; + t0 = x[ 3] ^ x[12]; + t1 = x[ 9] & x[ 5]; + t2 = x[17] & x[ 6]; + t3 = x[10] ^ t1 ; + t4 = x[14] & x[ 0]; + t5 = t4 ^ t1 ; + t6 = x[ 3] & x[12]; + t7 = x[16] & x[ 7]; + t8 = t0 ^ t6 ; + t9 = x[15] & x[13]; + t10 = t9 ^ t6 ; + t11 = x[ 1] & x[11]; + t12 = x[ 4] & x[20]; + t13 = t12 ^ t11 ; + t14 = x[ 2] & x[ 8]; + t15 = t14 ^ t11 ; + t16 = t3 ^ t2 ; + t17 = t5 ^ x[18]; + t18 = t8 ^ t7 ; + t19 = t10 ^ t15 ; + t20 = t16 ^ t13 ; + t21 = t17 ^ t15 ; + t22 = t18 ^ t13 ; + t23 = t19 ^ x[19]; + t24 = t22 ^ t23 ; + t25 = t22 & t20 ; + t26 = t21 ^ t25 ; + t27 = t20 ^ t21 ; + t28 = t23 ^ t25 ; + t29 = t28 & t27 ; + t30 = t26 & t24 ; + t31 = t20 & t23 ; + t32 = t27 & t31 ; + t33 = t27 ^ t25 ; + t34 = t21 & t22 ; + t35 = t24 & t34 ; + t36 = t24 ^ t25 ; + t37 = t21 ^ t29 ; + t38 = t32 ^ t33 ; + t39 = t23 ^ t30 ; + t40 = t35 ^ t36 ; + t41 = t38 ^ t40 ; + t42 = t37 ^ t39 ; + t43 = t37 ^ t38 ; + t44 = t39 ^ t40 ; + t45 = t42 ^ t41 ; + + y[ 0] = t38 & x[ 7]; + y[ 1] = t37 & x[13]; + y[ 2] = t42 & x[11]; + y[ 3] = t45 & x[20]; + y[ 4] = t41 & x[ 8]; + y[ 5] = t44 & x[ 9]; + y[ 6] = t40 & x[17]; + y[ 7] = t39 & x[14]; + y[ 8] = t43 & x[ 3]; + y[ 9] = t38 & x[16]; + y[10] = t37 & x[15]; + y[11] = t42 & x[ 1]; + y[12] = t45 & x[ 4]; + y[13] = t41 & x[ 2]; + y[14] = t44 & x[ 5]; + y[15] = t40 & x[ 6]; + y[16] = t39 & x[ 0]; + y[17] = t43 & x[12]; + + return y; +endfunction + +// inverse aes_sbox_top +function automatic logic [20:0] aes_inv_sbox_top(logic [7:0] x); + logic y20; + logic y19, y18, y17, y16, y15, y14, y13, y12, y11, y10; + logic y9, y8, y7, y6, y5, y4, y3, y2, y1, y0 ; + logic t4, t3, t2, t1, t0 ; + y17 = x[ 7] ^ x[ 4]; + y16 = x[ 6] ^~ x[ 4]; + y2 = x[ 7] ^~ x[ 6]; + y1 = x[ 4] ^ x[ 3]; + y18 = x[ 3] ^~ x[ 0]; + t0 = x[ 1] ^ x[ 0]; + y6 = x[ 6] ^~ y17 ; + y14 = y16 ^ t0; + y7 = x[ 0] ^~ y1; + y8 = y2 ^ y18; + y9 = y2 ^ t0; + y3 = y1 ^ t0; + y19 = x[ 5] ^~ y1; + t1 = x[ 6] ^ x[ 1]; + y13 = x[ 5] ^~ y14; + y15 = y18 ^ t1; + y4 = x[ 3] ^ y6; + t2 = x[ 5] ^~ x[ 2]; + t3 = x[ 2] ^~ x[ 1]; + t4 = x[ 5] ^~ x[ 3]; + y5 = y16 ^ t2 ; + y12 = t1 ^ t4 ; + y20 = y1 ^ t3 ; + y11 = y8 ^ y20 ; + y10 = y8 ^ t3 ; + y0 = x[ 7] ^ t2 ; + + return {y20, y19, y18, y17, y16, y15, y14, y13, y12, y11, + y10, y9, y8, y7, y6, y5, y4, y3, y2, y1, y0}; +endfunction + +// inverse aes_sbox_out +function automatic logic [7:0] aes_inv_sbox_out(logic [17:0] x); + logic [7:0] y; + logic t29, t28, t27, t26, t25, t24, t23, t22, t20; + logic t19, t18, t17, t16, t15, t14, t13, t12, t11, t10; + logic t9, t8, t7, t6, t5, t4, t3, t2, t1, t0 ; + t0 = x[ 2] ^ x[11]; + t1 = x[ 8] ^ x[ 9]; + t2 = x[ 4] ^ x[12]; + t3 = x[15] ^ x[ 0]; + t4 = x[16] ^ x[ 6]; + t5 = x[14] ^ x[ 1]; + t6 = x[17] ^ x[10]; + t7 = t0 ^ t1 ; + t8 = x[ 0] ^ x[ 3]; + t9 = x[ 5] ^ x[13]; + t10 = x[ 7] ^ t4 ; + t11 = t0 ^ t3 ; + t12 = x[14] ^ x[16]; + t13 = x[17] ^ x[ 1]; + t14 = x[17] ^ x[12]; + t15 = x[ 4] ^ x[ 9]; + t16 = x[ 7] ^ x[11]; + t17 = x[ 8] ^ t2 ; + t18 = x[13] ^ t5 ; + t19 = t2 ^ t3 ; + t20 = t4 ^ t6 ; + t22 = t2 ^ t7 ; + t23 = t7 ^ t8 ; + t24 = t5 ^ t7 ; + t25 = t6 ^ t10; + t26 = t9 ^ t11; + t27 = t10 ^ t18; + t28 = t11 ^ t25; + t29 = t15 ^ t20; + + y[ 0] = t9 ^ t16; + y[ 1] = t14 ^ t23; + y[ 2] = t19 ^ t24; + y[ 3] = t23 ^ t27; + y[ 4] = t12 ^ t22; + y[ 5] = t17 ^ t28; + y[ 6] = t26 ^ t29; + y[ 7] = t13 ^ t22; + return y; +endfunction + +logic [20:0] fwd_top, inv_top, top_box; +assign fwd_top = aes_sbox_top(in); +assign inv_top = aes_inv_sbox_top(in); +assign top_box = (fw)? fwd_top : inv_top; + +logic [17:0] mid; +assign mid = aes_sbox_inv_mid(top_box); + +logic [ 7:0] fwd_out, inv_out; +assign fwd_out = aes_sbox_out(mid); +assign inv_out = aes_inv_sbox_out(mid); +assign fx = (fw)? fwd_out : inv_out; + +endmodule + + + diff --git a/rtl/ibex_core.sv b/rtl/ibex_core.sv index 136648dfa7..74ec79c567 100644 --- a/rtl/ibex_core.sv +++ b/rtl/ibex_core.sv @@ -21,6 +21,7 @@ module ibex_core import ibex_pkg::*; #( parameter bit RV32E = 1'b0, parameter rv32m_e RV32M = RV32MFast, parameter rv32b_e RV32B = RV32BNone, + parameter rv32zk_e RV32K = RV32ZkNone, parameter bit BranchTargetALU = 1'b0, parameter bit WritebackStage = 1'b0, parameter bit ICache = 1'b0, @@ -457,6 +458,7 @@ module ibex_core import ibex_pkg::*; #( .RV32E (RV32E), .RV32M (RV32M), .RV32B (RV32B), + .RV32Zk (RV32K), .BranchTargetALU(BranchTargetALU), .DataIndTiming (DataIndTiming), .WritebackStage (WritebackStage), @@ -614,6 +616,7 @@ module ibex_core import ibex_pkg::*; #( ibex_ex_block #( .RV32M (RV32M), .RV32B (RV32B), + .RV32Zk (RV32K), .BranchTargetALU(BranchTargetALU) ) ex_block_i ( .clk_i (clk_i), diff --git a/rtl/ibex_decoder.sv b/rtl/ibex_decoder.sv index 4b01959301..67a278df87 100644 --- a/rtl/ibex_decoder.sv +++ b/rtl/ibex_decoder.sv @@ -14,10 +14,11 @@ `include "prim_assert.sv" module ibex_decoder #( - parameter bit RV32E = 0, - parameter ibex_pkg::rv32m_e RV32M = ibex_pkg::RV32MFast, - parameter ibex_pkg::rv32b_e RV32B = ibex_pkg::RV32BNone, - parameter bit BranchTargetALU = 0 + parameter bit RV32E = 0, + parameter ibex_pkg::rv32m_e RV32M = ibex_pkg::RV32MFast, + parameter ibex_pkg::rv32b_e RV32B = ibex_pkg::RV32BNone, + parameter ibex_pkg::rv32zk_e RV32Zk = ibex_pkg::RV32ZkNone, + parameter bit BranchTargetALU = 0 ) ( input logic clk_i, input logic rst_ni, @@ -368,12 +369,16 @@ module ibex_decoder #( 5'b0_1001, // bclri 5'b0_0101, // bseti 5'b0_1101: illegal_insn = (RV32B != RV32BNone) ? 1'b0 : 1'b1; // binvi - 5'b0_0001: begin - if (instr[26] == 1'b0) begin // shfl - illegal_insn = (RV32B == RV32BOTEarlGrey || RV32B == RV32BFull) ? 1'b0 : 1'b1; + 5'b0_0001: if (instr[26] == 1'b0) begin + if ((RV32B == RV32BOTEarlGrey) || (RV32B == RV32BFull)) begin + illegal_insn = 1'b0; // shfl + end else if (RV32Zk != RV32ZkNone) begin + illegal_insn = (instr[25:20] == 6'b00_1111) ? 1'b0 : 1'b1; //zip end else begin illegal_insn = 1'b1; end + end else begin + illegal_insn = 1'b1; end 5'b0_1100: begin unique case(instr[26:20]) @@ -393,6 +398,17 @@ module ibex_decoder #( default: illegal_insn = 1'b1; endcase end + 5'b0_0010: begin + unique case(instr[26:20]) + 7'b000_0000, // sha256sum0 + 7'b000_0001, // sha256sum1 + 7'b000_0010, // sha256sig0 + 7'b000_0011: illegal_insn = (RV32Zk == RV32Zkn) ? 1'b0 : 1'b1; // sha256sig1 + 7'b000_1000, // sm3p0 + 7'b000_1001: illegal_insn = (RV32Zk == RV32Zks) ? 1'b0 : 1'b1; // sm3p1 + default: illegal_insn = 1'b1; + endcase + end default : illegal_insn = 1'b1; endcase end @@ -408,16 +424,20 @@ module ibex_decoder #( 5'b0_0100: begin // sroi illegal_insn = (RV32B == RV32BOTEarlGrey || RV32B == RV32BFull) ? 1'b0 : 1'b1; end - 5'b0_1100, // rori + 5'b0_1100: illegal_insn = ((RV32B != RV32BNone) || + (RV32Zk!= RV32ZkNone)) ? 1'b0 : 1'b1; // rori 5'b0_1001: illegal_insn = (RV32B != RV32BNone) ? 1'b0 : 1'b1; // bexti 5'b0_1101: begin if (RV32B == RV32BOTEarlGrey || RV32B == RV32BFull) begin illegal_insn = 1'b0; // grevi end else if (RV32B == RV32BBalanced) begin - illegal_insn = (instr[24:20] == 5'b11000) ? 1'b0 : 1'b1; // rev8 - end else begin - illegal_insn = 1'b1; + unique case (instr[24:20]) + 5'b11000: illegal_insn = ((RV32B == RV32BBalanced) || + (RV32Zk != RV32ZkNone )) ? 1'b0 : 1'b1; // rev8 + 5'b00111: illegal_insn = ( RV32Zk != RV32ZkNone ) ? 1'b0 : 1'b1; // brev8 + default: illegal_insn = 1'b1; + endcase end end 5'b0_0101: begin @@ -430,8 +450,14 @@ module ibex_decoder #( end end 5'b0_0001: begin - if (instr[26] == 1'b0) begin // unshfl - illegal_insn = (RV32B == RV32BOTEarlGrey || RV32B == RV32BFull) ? 1'b0 : 1'b1; + if (instr[26] == 1'b0) begin + if ((RV32B == RV32BOTEarlGrey) || (RV32B == RV32BFull)) begin + illegal_insn = 1'b0; // unshfl + end else if (RV32Zk != RV32ZkNone) begin + illegal_insn = (instr[25:20] == 6'b00_1111) ? 1'b0 : 1'b1; //unzip + end else begin + illegal_insn = 1'b1; + end end else begin illegal_insn = 1'b1; end @@ -452,6 +478,10 @@ module ibex_decoder #( rf_we = 1'b1; if ({instr[26], instr[13:12]} == {1'b1, 2'b01}) begin illegal_insn = (RV32B != RV32BNone) ? 1'b0 : 1'b1; // cmix / cmov / fsl / fsr + end else if ({instr[29:28],instr[25], instr[14:12]} == {3'b10__1, 3'b000}) begin + illegal_insn = (RV32Zk == RV32Zkn) ? 1'b0 : 1'b1; // aes32ds/es aes32dsm/esm + end else if ({instr[29:27],instr[25], instr[14:12]} == {4'b110_0, 3'b000}) begin + illegal_insn = (RV32Zk == RV32Zks) ? 1'b0 : 1'b1; // sm4ed / sm4ks end else begin unique case ({instr[31:25], instr[14:12]}) // RV32I ALU operations @@ -469,20 +499,23 @@ module ibex_decoder #( // RV32B zba {7'b001_0000, 3'b010}, // sh1add {7'b001_0000, 3'b100}, // sh2add - {7'b001_0000, 3'b110}, // sh3add - // RV32B zbb + {7'b001_0000, 3'b110}: illegal_insn = (RV32B != RV32BNone) ? 1'b0 : 1'b1; // sh3add + // RV32B zbb and RV32Zk {7'b010_0000, 3'b111}, // andn {7'b010_0000, 3'b110}, // orn {7'b010_0000, 3'b100}, // xnor {7'b011_0000, 3'b001}, // rol {7'b011_0000, 3'b101}, // ror + {7'b000_0100, 3'b100}, // pack + {7'b000_0100, 3'b111}: illegal_insn = ((RV32B != RV32BNone ) || + (RV32Zk != RV32ZkNone)) ? 1'b0 : 1'b1; // packh + {7'b010_0100, 3'b100}, // packu + {7'b001_0000, 3'b001}, // slo + {7'b001_0000, 3'b101}, // sro {7'b000_0101, 3'b100}, // min {7'b000_0101, 3'b110}, // max {7'b000_0101, 3'b101}, // minu {7'b000_0101, 3'b111}, // maxu - {7'b000_0100, 3'b100}, // pack - {7'b010_0100, 3'b100}, // packu - {7'b000_0100, 3'b111}, // packh // RV32B zbs {7'b010_0100, 3'b001}, // bclr {7'b001_0100, 3'b001}, // bset @@ -495,20 +528,30 @@ module ibex_decoder #( {7'b001_0100, 3'b101}, // gorc {7'b000_0100, 3'b001}, // shfl {7'b000_0100, 3'b101}, // unshfl - {7'b001_0100, 3'b010}, // xperm.n - {7'b001_0100, 3'b100}, // xperm.b - {7'b001_0100, 3'b110}, // xperm.h - {7'b001_0000, 3'b001}, // slo - {7'b001_0000, 3'b101}, // sro + {7'b001_0100, 3'b110}: illegal_insn = (RV32B == RV32BFull) ? 1'b0 : 1'b1; // xperm.h + // RV32B zbp & RV32Zk zbk + {7'b001_0100, 3'b010}, // xperm.n/xperm4 + {7'b001_0100, 3'b100}: illegal_insn = ((RV32B == RV32BFull ) || // xperm.b/xperm8 + (RV32Zk != RV32ZkNone)) ? 1'b0 : 1'b1; // RV32B zbc {7'b000_0101, 3'b001}, // clmul - {7'b000_0101, 3'b010}, // clmulr - {7'b000_0101, 3'b011}: begin // clmulh + {7'b000_0101, 3'b011}: illegal_insn = ((RV32B == RV32BFull ) || + (RV32B == RV32BOTEarlGrey) || + (RV32Zk != RV32ZkNone)) ? 1'b0 : 1'b1; // clmulh + // RV32B zbc + {7'b000_0101, 3'b010}: begin // clmulr illegal_insn = (RV32B == RV32BOTEarlGrey || RV32B == RV32BFull) ? 1'b0 : 1'b1; end // RV32B zbe {7'b010_0100, 3'b110}, // bdecompress {7'b000_0100, 3'b110}: illegal_insn = (RV32B == RV32BFull) ? 1'b0 : 1'b1; // bcompress + // RV32Zk zkh + {7'b010_1000, 3'b000}, // sha512_sum0r + {7'b010_1001, 3'b000}, // sha512_sum1r + {7'b010_1010, 3'b000}, // sha512_sig0l + {7'b010_1011, 3'b000}, // sha512_sig1l + {7'b010_1110, 3'b000}, // sha512_sig0h + {7'b010_1111, 3'b000}: illegal_insn = (RV32Zk == RV32Zkn) ? 1'b0 : 1'b1; // sha512_sig1h // RV32M instructions {7'b000_0001, 3'b000}: begin // mul @@ -832,6 +875,7 @@ module ibex_decoder #( if (RV32B != RV32BNone) begin unique case (instr_alu[31:27]) 5'b0_0000: alu_operator_o = ALU_SLL; // Shift Left Logical by Immediate + // Shift Left Ones by Immediate 5'b0_0100: begin if (RV32B == RV32BOTEarlGrey || RV32B == RV32BFull) alu_operator_o = ALU_SLO; @@ -839,6 +883,7 @@ module ibex_decoder #( 5'b0_1001: alu_operator_o = ALU_BCLR; // Clear bit specified by immediate 5'b0_0101: alu_operator_o = ALU_BSET; // Set bit specified by immediate 5'b0_1101: alu_operator_o = ALU_BINV; // Invert bit specified by immediate. + // Shuffle with Immediate Control Value 5'b0_0001: if (instr_alu[26] == 0) alu_operator_o = ALU_SHFL; 5'b0_1100: begin @@ -890,6 +935,28 @@ module ibex_decoder #( default: ; endcase + end else if (RV32Zk != RV32ZkNone) begin + unique case (instr_alu[31:27]) + 5'b0_0001: if (instr[26:20] == 7'b000_1111) alu_operator_o = ZKB_ZIP;//zbk_zip + 5'b0_0010: begin // zkn, zks + unique case (instr_alu[26:20]) + // sha256sum0 + 7'b000_0000: if (RV32Zk == RV32Zkn) alu_operator_o = ZKN_SHA256SUM0; + // sha256sum1 + 7'b000_0001: if (RV32Zk == RV32Zkn) alu_operator_o = ZKN_SHA256SUM1; + // sha256sig0 + 7'b000_0010: if (RV32Zk == RV32Zkn) alu_operator_o = ZKN_SHA256SIG0; + // sha256sig1 + 7'b000_0011: if (RV32Zk == RV32Zkn) alu_operator_o = ZKN_SHA256SIG1; + // sm3p0 + 7'b000_1000: if (RV32Zk == RV32Zks) alu_operator_o = ZKS_SM3P0; + // sm3p1 + 7'b000_1001: if (RV32Zk == RV32Zks) alu_operator_o = ZKS_SM3P1; + default: alu_operator_o = ALU_SLL; + endcase + end + default: alu_operator_o = ALU_SLL; + endcase end else begin alu_operator_o = ALU_SLL; // Shift Left Logical by Immediate end @@ -929,16 +996,24 @@ module ibex_decoder #( default: ; endcase end - end else begin - if (instr_alu[31:27] == 5'b0_0000) begin - alu_operator_o = ALU_SRL; // Shift Right Logical by Immediate - end else if (instr_alu[31:27] == 5'b0_1000) begin - alu_operator_o = ALU_SRA; // Shift Right Arithmetically by Immediate - end + unique case (instr_alu[31:27]) + 5'b0_0000: alu_operator_o = ALU_SRL; // Shift Right Logical by Immediate + 5'b0_1000: alu_operator_o = ALU_SRA; // Shift Right Arithmetically by Immediate + 5'b0_1100: if (RV32Zk != RV32ZkNone) begin + if (instr_alu[26] == 1'b0) alu_operator_o = ZKB_RORI; // zbkb_rori + end + 5'b0_1101: if (RV32Zk != RV32ZkNone) begin + if (instr_alu[26:20] == 7'b000_0111) alu_operator_o = ZKB_BREV8; // zbkb_brev8 + if (instr_alu[26:20] == 7'b001_1000) alu_operator_o = ZKB_REV8; // zbkb_rev8 + end + 5'b0_0001: if (RV32Zk != RV32ZkNone) begin + if (instr_alu[26:20] == 7'b000_1111) alu_operator_o = ZKB_UNZIP; // zbkb_unzip + end + default: ; + endcase end end - default: ; endcase end @@ -947,7 +1022,7 @@ module ibex_decoder #( alu_op_a_mux_sel_o = OP_A_REG_A; alu_op_b_mux_sel_o = OP_B_REG_B; - if (instr_alu[26]) begin + if ({instr[26], instr[13:12]} == {1'b1, 2'b01}) begin if (RV32B != RV32BNone) begin unique case ({instr_alu[26:25], instr_alu[14:12]}) {2'b11, 3'b001}: begin @@ -1008,12 +1083,16 @@ module ibex_decoder #( if (RV32B != RV32BNone) begin alu_operator_o = ALU_ROL; alu_multicycle_o = 1'b1; + end else if ((RV32Zk != RV32ZkNone)) begin + alu_operator_o = ZKB_ROL; // zbk_rol end end {7'b011_0000, 3'b101}: begin if (RV32B != RV32BNone) begin alu_operator_o = ALU_ROR; alu_multicycle_o = 1'b1; + end else if (RV32Zk != RV32ZkNone) begin + alu_operator_o = ZKB_ROR; // zbk_ror end end @@ -1022,14 +1101,28 @@ module ibex_decoder #( {7'b000_0101, 3'b101}: if (RV32B != RV32BNone) alu_operator_o = ALU_MINU; {7'b000_0101, 3'b111}: if (RV32B != RV32BNone) alu_operator_o = ALU_MAXU; - {7'b000_0100, 3'b100}: if (RV32B != RV32BNone) alu_operator_o = ALU_PACK; - {7'b010_0100, 3'b100}: if (RV32B != RV32BNone) alu_operator_o = ALU_PACKU; - {7'b000_0100, 3'b111}: if (RV32B != RV32BNone) alu_operator_o = ALU_PACKH; - - {7'b010_0000, 3'b100}: if (RV32B != RV32BNone) alu_operator_o = ALU_XNOR; - {7'b010_0000, 3'b110}: if (RV32B != RV32BNone) alu_operator_o = ALU_ORN; - {7'b010_0000, 3'b111}: if (RV32B != RV32BNone) alu_operator_o = ALU_ANDN; + {7'b000_0100, 3'b100}: begin + if (RV32B != RV32BNone ) alu_operator_o = ALU_PACK; // pack + else if (RV32Zk != RV32ZkNone) alu_operator_o = ZKB_PACK; // pack + end + {7'b010_0100, 3'b100}: if (RV32B != RV32BNone) alu_operator_o = ALU_PACKU; // packu + {7'b000_0100, 3'b111}: begin + if (RV32B != RV32BNone ) alu_operator_o = ALU_PACKH; // packh + else if (RV32Zk != RV32ZkNone) alu_operator_o = ZKB_PACKH; // packh + end + {7'b010_0000, 3'b100}: begin + if (RV32B != RV32BNone ) alu_operator_o = ALU_XNOR; // xnor + else if (RV32Zk != RV32ZkNone) alu_operator_o = ZKB_XNOR; // xnor + end + {7'b010_0000, 3'b110}: begin + if (RV32B != RV32BNone ) alu_operator_o = ALU_ORN; // orn + else if (RV32Zk != RV32ZkNone) alu_operator_o = ZKB_ORN; // orn + end + {7'b010_0000, 3'b111}: begin + if (RV32B != RV32BNone ) alu_operator_o = ALU_ANDN; // andn + else if (RV32Zk != RV32ZkNone) alu_operator_o = ZKB_ANDN; // andn + end // RV32B zba {7'b001_0000, 3'b010}: if (RV32B != RV32BNone) alu_operator_o = ALU_SH1ADD; {7'b001_0000, 3'b100}: if (RV32B != RV32BNone) alu_operator_o = ALU_SH2ADD; @@ -1053,31 +1146,35 @@ module ibex_decoder #( {7'b000_0100, 3'b101}: begin if (RV32B == RV32BOTEarlGrey || RV32B == RV32BFull) alu_operator_o = ALU_UNSHFL; end - {7'b001_0100, 3'b010}: begin - if (RV32B == RV32BOTEarlGrey || RV32B == RV32BFull) alu_operator_o = ALU_XPERM_N; + {7'b001_0000, 3'b001}: begin + if (RV32B == RV32BOTEarlGrey || RV32B == RV32BFull) alu_operator_o = ALU_SLO; end - {7'b001_0100, 3'b100}: begin - if (RV32B == RV32BOTEarlGrey || RV32B == RV32BFull) alu_operator_o = ALU_XPERM_B; + {7'b001_0000, 3'b101}: begin + if (RV32B == RV32BOTEarlGrey || RV32B == RV32BFull) alu_operator_o = ALU_SRO; end {7'b001_0100, 3'b110}: begin if (RV32B == RV32BOTEarlGrey || RV32B == RV32BFull) alu_operator_o = ALU_XPERM_H; end - {7'b001_0000, 3'b001}: begin - if (RV32B == RV32BOTEarlGrey || RV32B == RV32BFull) alu_operator_o = ALU_SLO; + // RV32B zbp & RV32K zkb + {7'b001_0100, 3'b010}: begin + if (RV32B == RV32BOTEarlGrey || RV32B == RV32BFull) alu_operator_o = ALU_XPERM_N; + else if (RV32Zk != RV32ZkNone) alu_operator_o = ZKB_XPERM4; // xperm4 end - {7'b001_0000, 3'b101}: begin - if (RV32B == RV32BOTEarlGrey || RV32B == RV32BFull) alu_operator_o = ALU_SRO; + {7'b001_0100, 3'b100}: begin + if (RV32B == RV32BOTEarlGrey || RV32B == RV32BFull) alu_operator_o = ALU_XPERM_B; + else if (RV32Zk != RV32ZkNone) alu_operator_o = ZKB_XPERM8; // xperm8 end - - // RV32B zbc + // RV32B zbc & RV32K zkb {7'b000_0101, 3'b001}: begin if (RV32B == RV32BOTEarlGrey || RV32B == RV32BFull) alu_operator_o = ALU_CLMUL; + else if (RV32Zk != RV32ZkNone) alu_operator_o = ZKB_CLMUL; // clmul end {7'b000_0101, 3'b010}: begin if (RV32B == RV32BOTEarlGrey || RV32B == RV32BFull) alu_operator_o = ALU_CLMULR; end {7'b000_0101, 3'b011}: begin if (RV32B == RV32BOTEarlGrey || RV32B == RV32BFull) alu_operator_o = ALU_CLMULH; + else if (RV32Zk != RV32ZkNone) alu_operator_o = ZKB_CLMULH; // clmulh end // RV32B zbe @@ -1094,6 +1191,54 @@ module ibex_decoder #( end end + // RV32Zk zkh + // sha512_sum0r + {7'b010_1000, 3'b000}: if (RV32Zk == RV32Zkn) alu_operator_o = ZKN_SHA512SUM0R; + // sha512_sum1r + {7'b010_1001, 3'b000}: if (RV32Zk == RV32Zkn) alu_operator_o = ZKN_SHA512SUM1R; + // sha512_sig0l + {7'b010_1010, 3'b000}: if (RV32Zk == RV32Zkn) alu_operator_o = ZKN_SHA512SIG0L; + // sha512_sig1l + {7'b010_1011, 3'b000}: if (RV32Zk == RV32Zkn) alu_operator_o = ZKN_SHA512SIG1L; + // sha512_sig0h + {7'b010_1110, 3'b000}: if (RV32Zk == RV32Zkn) alu_operator_o = ZKN_SHA512SIG0H; + // sha512_sig1h + {7'b010_1111, 3'b000}: if (RV32Zk == RV32Zkn) alu_operator_o = ZKN_SHA512SIG1H; + + // RV32Zk zkned + // aes32es + {7'b001_0001, 3'b000}: if (RV32Zk == RV32Zkn) alu_operator_o = ZKN_AES32ESB0; + {7'b011_0001, 3'b000}: if (RV32Zk == RV32Zkn) alu_operator_o = ZKN_AES32ESB1; + {7'b101_0001, 3'b000}: if (RV32Zk == RV32Zkn) alu_operator_o = ZKN_AES32ESB2; + {7'b111_0001, 3'b000}: if (RV32Zk == RV32Zkn) alu_operator_o = ZKN_AES32ESB3; + // aes32esm + {7'b001_0011, 3'b000}: if (RV32Zk == RV32Zkn) alu_operator_o = ZKN_AES32ESMB0; + {7'b011_0011, 3'b000}: if (RV32Zk == RV32Zkn) alu_operator_o = ZKN_AES32ESMB1; + {7'b101_0011, 3'b000}: if (RV32Zk == RV32Zkn) alu_operator_o = ZKN_AES32ESMB2; + {7'b111_0011, 3'b000}: if (RV32Zk == RV32Zkn) alu_operator_o = ZKN_AES32ESMB3; + // aes32dsb0 + {7'b001_0101, 3'b000}: if (RV32Zk == RV32Zkn) alu_operator_o = ZKN_AES32DSB0; + {7'b011_0101, 3'b000}: if (RV32Zk == RV32Zkn) alu_operator_o = ZKN_AES32DSB1; + {7'b101_0101, 3'b000}: if (RV32Zk == RV32Zkn) alu_operator_o = ZKN_AES32DSB2; + {7'b111_0101, 3'b000}: if (RV32Zk == RV32Zkn) alu_operator_o = ZKN_AES32DSB3; + // aes32dsmb0 + {7'b001_0111, 3'b000}: if (RV32Zk == RV32Zkn) alu_operator_o = ZKN_AES32DSMB0; + {7'b011_0111, 3'b000}: if (RV32Zk == RV32Zkn) alu_operator_o = ZKN_AES32DSMB1; + {7'b101_0111, 3'b000}: if (RV32Zk == RV32Zkn) alu_operator_o = ZKN_AES32DSMB2; + {7'b111_0111, 3'b000}: if (RV32Zk == RV32Zkn) alu_operator_o = ZKN_AES32DSMB3; + + // RV32Zk zks + // sm4edb0 + {7'b001_1000, 3'b000}: if (RV32Zk == RV32Zks) alu_operator_o = ZKS_SM4EDB0; + {7'b011_1000, 3'b000}: if (RV32Zk == RV32Zks) alu_operator_o = ZKS_SM4EDB1; + {7'b101_1000, 3'b000}: if (RV32Zk == RV32Zks) alu_operator_o = ZKS_SM4EDB2; + {7'b111_1000, 3'b000}: if (RV32Zk == RV32Zks) alu_operator_o = ZKS_SM4EDB3; + // sm4ksb0 + {7'b001_1010, 3'b000}: if (RV32Zk == RV32Zks) alu_operator_o = ZKS_SM4KSB0; + {7'b011_1010, 3'b000}: if (RV32Zk == RV32Zks) alu_operator_o = ZKS_SM4KSB1; + {7'b101_1010, 3'b000}: if (RV32Zk == RV32Zks) alu_operator_o = ZKS_SM4KSB2; + {7'b111_1010, 3'b000}: if (RV32Zk == RV32Zks) alu_operator_o = ZKS_SM4KSB3; + // RV32M instructions, all use the same ALU operation {7'b000_0001, 3'b000}: begin // mul alu_operator_o = ALU_ADD; diff --git a/rtl/ibex_ex_block.sv b/rtl/ibex_ex_block.sv index ee900164b9..a7ad75532a 100644 --- a/rtl/ibex_ex_block.sv +++ b/rtl/ibex_ex_block.sv @@ -9,9 +9,10 @@ * Execution block: Hosts ALU and MUL/DIV unit */ module ibex_ex_block #( - parameter ibex_pkg::rv32m_e RV32M = ibex_pkg::RV32MFast, - parameter ibex_pkg::rv32b_e RV32B = ibex_pkg::RV32BNone, - parameter bit BranchTargetALU = 0 + parameter ibex_pkg::rv32m_e RV32M = ibex_pkg::RV32MFast, + parameter ibex_pkg::rv32b_e RV32B = ibex_pkg::RV32BNone, + parameter ibex_pkg::rv32zk_e RV32Zk = ibex_pkg::RV32ZkNone, + parameter bit BranchTargetALU = 0 ) ( input logic clk_i, input logic rst_ni, @@ -55,11 +56,12 @@ module ibex_ex_block #( import ibex_pkg::*; - logic [31:0] alu_result, multdiv_result; + logic [31:0] alu_result, zke_result, multdiv_result; logic [32:0] multdiv_alu_operand_b, multdiv_alu_operand_a; logic [33:0] alu_adder_result_ext; logic alu_cmp_result, alu_is_equal_result; + logic zke_val; logic multdiv_valid; logic multdiv_sel; logic [31:0] alu_imd_val_q[2]; @@ -86,7 +88,7 @@ module ibex_ex_block #( assign alu_imd_val_q = '{imd_val_q_i[0][31:0], imd_val_q_i[1][31:0]}; - assign result_ex_o = multdiv_sel ? multdiv_result : alu_result; + assign result_ex_o = multdiv_sel ? multdiv_result : (zke_val ? zke_result : alu_result); // branch handling assign branch_decision_o = alu_cmp_result; @@ -133,6 +135,24 @@ module ibex_ex_block #( .is_equal_result_o (alu_is_equal_result) ); + ////////////////// + // Zk Extension // + ////////////////// + if (RV32Zk != RV32ZkNone) begin : gen_Zkn + ibex_zk #( + .RV32Zk(RV32Zk) + ) zkn_i ( + .operator_i (alu_operator_i), + .operand_a_i (alu_operand_a_i), + .operand_b_i (alu_operand_b_i), + .result_o (zke_result), + .zk_val_o (zke_val) + ); + end else begin : gen_no_Zkn + assign zke_result = 32'd0; + assign zke_val = 1'b0; + end + //////////////// // Multiplier // //////////////// diff --git a/rtl/ibex_id_stage.sv b/rtl/ibex_id_stage.sv index 2e795737ca..0bb9880dcd 100644 --- a/rtl/ibex_id_stage.sv +++ b/rtl/ibex_id_stage.sv @@ -18,13 +18,14 @@ `include "dv_fcov_macros.svh" module ibex_id_stage #( - parameter bit RV32E = 0, - parameter ibex_pkg::rv32m_e RV32M = ibex_pkg::RV32MFast, - parameter ibex_pkg::rv32b_e RV32B = ibex_pkg::RV32BNone, - parameter bit DataIndTiming = 1'b0, - parameter bit BranchTargetALU = 0, - parameter bit WritebackStage = 0, - parameter bit BranchPredictor = 0 + parameter bit RV32E = 0, + parameter ibex_pkg::rv32m_e RV32M = ibex_pkg::RV32MFast, + parameter ibex_pkg::rv32b_e RV32B = ibex_pkg::RV32BNone, + parameter ibex_pkg::rv32zk_e RV32Zk = ibex_pkg::RV32ZkNone, + parameter bit DataIndTiming = 1'b0, + parameter bit BranchTargetALU = 0, + parameter bit WritebackStage = 0, + parameter bit BranchPredictor = 0 ) ( input logic clk_i, input logic rst_ni, @@ -423,6 +424,7 @@ module ibex_id_stage #( .RV32E (RV32E), .RV32M (RV32M), .RV32B (RV32B), + .RV32Zk (RV32Zk), .BranchTargetALU(BranchTargetALU) ) decoder_i ( .clk_i (clk_i), diff --git a/rtl/ibex_lockstep.sv b/rtl/ibex_lockstep.sv index 3d1e20f099..846b2d04eb 100644 --- a/rtl/ibex_lockstep.sv +++ b/rtl/ibex_lockstep.sv @@ -16,6 +16,7 @@ module ibex_lockstep import ibex_pkg::*; #( parameter bit RV32E = 1'b0, parameter rv32m_e RV32M = RV32MFast, parameter rv32b_e RV32B = RV32BNone, + parameter rv32zk_e RV32K = RV32ZkNone, parameter bit BranchTargetALU = 1'b0, parameter bit WritebackStage = 1'b0, parameter bit ICache = 1'b0, @@ -323,6 +324,7 @@ module ibex_lockstep import ibex_pkg::*; #( .RV32E ( RV32E ), .RV32M ( RV32M ), .RV32B ( RV32B ), + .RV32K ( RV32K ), .BranchTargetALU ( BranchTargetALU ), .ICache ( ICache ), .ICacheECC ( ICacheECC ), diff --git a/rtl/ibex_pkg.sv b/rtl/ibex_pkg.sv index c6aac9cb21..c30b2d56b3 100644 --- a/rtl/ibex_pkg.sv +++ b/rtl/ibex_pkg.sv @@ -51,6 +51,13 @@ package ibex_pkg; RV32BFull = 3 } rv32b_e; + typedef enum integer { + RV32ZkNone = 0, + RV32Zbkb = 1, + RV32Zkn = 2, + RV32Zks = 3 + } rv32zk_e; + ///////////// // Opcodes // ///////////// @@ -180,7 +187,65 @@ package ibex_pkg; ALU_CRC32_H, ALU_CRC32C_H, ALU_CRC32_W, - ALU_CRC32C_W + ALU_CRC32C_W, + + // Zbkb + ZKB_RORI, + ZKB_BREV8, + ZKB_REV8, + ZKB_ZIP, + ZKB_UNZIP, + ZKB_ROR, + ZKB_ROL, + ZKB_ANDN, + ZKB_ORN, + ZKB_XNOR, + ZKB_PACK, + ZKB_PACKH, + ZKB_CLMUL, + ZKB_CLMULH, + ZKB_XPERM8, + ZKB_XPERM4, + + // Zkn + ZKN_SHA256SUM0, + ZKN_SHA256SUM1, + ZKN_SHA256SIG0, + ZKN_SHA256SIG1, + ZKN_SHA512SUM0R, + ZKN_SHA512SUM1R, + ZKN_SHA512SIG0L, + ZKN_SHA512SIG1L, + ZKN_SHA512SIG0H, + ZKN_SHA512SIG1H, + ZKN_AES32DSB0, + ZKN_AES32DSB1, + ZKN_AES32DSB2, + ZKN_AES32DSB3, + ZKN_AES32DSMB0, + ZKN_AES32DSMB1, + ZKN_AES32DSMB2, + ZKN_AES32DSMB3, + ZKN_AES32ESB0, + ZKN_AES32ESB1, + ZKN_AES32ESB2, + ZKN_AES32ESB3, + ZKN_AES32ESMB0, + ZKN_AES32ESMB1, + ZKN_AES32ESMB2, + ZKN_AES32ESMB3, + + //Zks + ZKS_SM4EDB0, + ZKS_SM4EDB1, + ZKS_SM4EDB2, + ZKS_SM4EDB3, + ZKS_SM4KSB0, + ZKS_SM4KSB1, + ZKS_SM4KSB2, + ZKS_SM4KSB3, + ZKS_SM3P0, + ZKS_SM3P1 } alu_op_e; typedef enum logic [1:0] { @@ -191,7 +256,6 @@ package ibex_pkg; MD_OP_REM } md_op_e; - ////////////////////////////////// // Control and status registers // ////////////////////////////////// diff --git a/rtl/ibex_poly16_mul.sv b/rtl/ibex_poly16_mul.sv new file mode 100644 index 0000000000..6a3329dae4 --- /dev/null +++ b/rtl/ibex_poly16_mul.sv @@ -0,0 +1,411 @@ +// Copyright lowRISC contributors. +// Copyright 2018 ETH Zurich and University of Bologna, see also CREDITS.md. +// Licensed under the Apache License, Version 2.0, see LICENSE for details. +// SPDX-License-Identifier: Apache-2.0 + +/** + * 16-bit Polynominal Multiplier unit + * The implemenation follows the circuit optimisation introduced by the NIST circuit complexity team [1] + * + * [1] https://github.com/usnistgov/Circuits/ + */ +module ibex_poly16_mul ( +input logic [15:0] a, +input logic [15:0] b, +output logic [31:0] r +); + + logic t1, t2, t3, t4, t5, t6, t7, t8, t9 ; + logic t10, t11, t12, t13, t14, t15, t16, t17, t18, t19; + logic t20, t21, t22, t23, t24, t25, t26, t27, t28, t29; + logic t30, t31, t32, t33, t34, t35, t36, t37, t38, t39; + logic t40, t41, t42, t43, t44, t45, t46, t47, t48, t49; + logic t50, t51, t52, t53, t54, t55, t56, t57, t58, t59; + logic t60, t61, t62, t63, t64, t65, t66, t67, t68, t69; + logic t70, t71, t72, t73, t74, t75, t76, t77, t78, t79; + logic t80, t81, t82, t83, t84, t85, t86, t87, t88, t89; + logic t90, t91, t92, t93, t94, t95, t96, t97, t98, t99; + + logic t100, t101, t102, t103, t104, t105, t106, t107, t108, t109; + logic t110, t111, t112, t113, t114, t115, t116, t117, t118, t119; + logic t120, t121, t122, t123, t124, t125, t126, t127, t128, t129; + logic t130, t131, t132, t133, t134, t135, t136, t137, t138, t139; + logic t140, t141, t142, t143, t144, t145, t146, t147, t148, t149; + logic t150, t151, t152, t153, t154, t155, t156, t157, t158, t159; + logic t160, t161, t162, t163, t164, t165, t166, t167, t168, t169; + logic t170, t171, t172, t173, t174, t175, t176, t177, t178, t179; + logic t180, t181, t182, t183, t184, t185, t186, t187, t188, t189; + logic t190, t191, t192, t193, t194, t195, t196, t197, t198, t199; + + logic t200, t201, t202, t203, t204, t205, t206, t207, t208, t209; + logic t210, t211, t212, t213, t214, t215, t216, t217, t218, t219; + logic t220, t221, t222, t223, t224, t225, t226, t227, t228, t229; + logic t230, t231, t232, t233, t234, t235, t236, t237, t238, t239; + logic t240, t241, t242, t243, t244, t245, t246, t247, t248, t249; + logic t250, t251, t252, t253, t254, t255, t256, t257, t258, t259; + logic t260, t261, t262, t263, t264, t265, t266, t267, t268, t269; + logic t270, t271, t272, t273, t274, t275, t276, t277, t278, t279; + logic t280, t281, t282, t283, t284, t285, t286, t287, t288, t289; + logic t290, t291, t292, t293, t294, t295, t296, t297, t298, t299; + + logic t300, t301, t302, t303, t304, t305, t306, t307, t308, t309; + logic t310, t311, t312, t313, t314, t315, t316, t317, t318; + + logic z0, z1, z2, z3, z4, z5, z6, z7, z8, z9; + logic z10, z11, z12, z13, z14, z15, z16, z17, z18, z19; + logic z20, z21, z22, z23, z24, z25, z26, z27, z28, z29, z30; + + assign z30 = a[15] & b[15]; + assign t1 = a[15] & b[12]; + assign t2 = a[15] & b[13]; + assign t3 = a[15] & b[14]; + assign t4 = a[12] & b[15]; + assign t5 = a[13] & b[15]; + assign t6 = a[14] & b[15]; + assign t7 = a[14] & b[14]; + assign t8 = a[14] & b[12]; + assign t9 = a[14] & b[13]; + assign t10 = a[12] & b[14]; + assign t11 = a[13] & b[14]; + assign t12 = a[13] & b[13]; + assign t13 = a[13] & b[12]; + assign t14 = a[12] & b[13]; + assign t15 = a[12] & b[12]; + assign t16 = a[11] & b[11]; + assign t17 = a[11] & b[ 8]; + assign t18 = a[11] & b[ 9]; + assign t19 = a[11] & b[10]; + assign t20 = a[ 8] & b[11]; + assign t21 = a[ 9] & b[11]; + assign t22 = a[10] & b[11]; + assign t23 = a[10] & b[10]; + assign t24 = a[10] & b[ 8]; + assign t25 = a[10] & b[ 9]; + assign t26 = a[ 8] & b[10]; + assign t27 = a[ 9] & b[10]; + assign t28 = a[ 9] & b[ 9]; + assign t29 = a[ 9] & b[ 8]; + assign t30 = a[ 8] & b[ 9]; + assign t31 = a[ 8] & b[ 8]; + assign t32 = a[ 7] & b[ 7]; + assign t33 = a[ 7] & b[ 4]; + assign t34 = a[ 7] & b[ 5]; + assign t35 = a[ 7] & b[ 6]; + assign t36 = a[ 4] & b[ 7]; + assign t37 = a[ 5] & b[ 7]; + assign t38 = a[ 6] & b[ 7]; + assign t39 = a[ 6] & b[ 6]; + assign t40 = a[ 6] & b[ 4]; + assign t41 = a[ 6] & b[ 5]; + assign t42 = a[ 4] & b[ 6]; + assign t43 = a[ 5] & b[ 6]; + assign t44 = a[ 5] & b[ 5]; + assign t45 = a[ 5] & b[ 4]; + assign t46 = a[ 4] & b[ 5]; + assign t47 = a[ 4] & b[ 4]; + assign t48 = a[ 3] & b[ 3]; + assign t49 = a[ 3] & b[ 0]; + assign t50 = a[ 3] & b[ 1]; + assign t51 = a[ 3] & b[ 2]; + assign t52 = a[ 0] & b[ 3]; + assign t53 = a[ 1] & b[ 3]; + assign t54 = a[ 2] & b[ 3]; + assign t55 = a[ 2] & b[ 2]; + assign t56 = a[ 2] & b[ 0]; + assign t57 = a[ 2] & b[ 1]; + assign t58 = a[ 0] & b[ 2]; + assign t59 = a[ 1] & b[ 2]; + assign t60 = a[ 1] & b[ 1]; + assign t61 = a[ 1] & b[ 0]; + assign t62 = a[ 0] & b[ 1]; + assign z0 = a[ 0] & b[ 0]; + assign t63 = b[ 8] ^ b[12]; + assign t64 = b[ 9] ^ b[13]; + assign t65 = b[10] ^ b[14]; + assign t66 = b[11] ^ b[15]; + assign t67 = a[ 8] ^ a[12]; + assign t68 = a[ 9] ^ a[13]; + assign t69 = a[10] ^ a[14]; + assign t70 = a[11] ^ a[15]; + assign t71 = t70 & t66; + assign t72 = t70 & t63; + assign t73 = t70 & t64; + assign t74 = t70 & t65; + assign t75 = t67 & t66; + assign t76 = t68 & t66; + assign t77 = t69 & t66; + assign t78 = t69 & t65; + assign t79 = t69 & t63; + assign t80 = t69 & t64; + assign t81 = t67 & t65; + assign t82 = t68 & t65; + assign t83 = t68 & t64; + assign t84 = t68 & t63; + assign t85 = t67 & t64; + assign t86 = t67 & t63; + assign t87 = b[0] ^ b[ 4]; + assign t88 = b[1] ^ b[ 5]; + assign t89 = b[2] ^ b[ 6]; + assign t90 = b[3] ^ b[ 7]; + assign t91 = a[0] ^ a[ 4]; + assign t92 = a[1] ^ a[ 5]; + assign t93 = a[2] ^ a[ 6]; + assign t94 = a[3] ^ a[ 7]; + assign t95 = t94 & t90; + assign t96 = t94 & t87; + assign t97 = t94 & t88; + assign t98 = t94 & t89; + assign t99 = t91 & t90; + + assign t100 = t92 & t90; + assign t101 = t93 & t90; + assign t102 = t93 & t89; + assign t103 = t93 & t87; + assign t104 = t93 & t88; + assign t105 = t91 & t89; + assign t106 = t92 & t89; + assign t107 = t92 & t88; + assign t108 = t92 & t87; + assign t109 = t91 & t88; + assign t110 = t91 & t87; + assign t111 = b[4] ^ b[12]; + assign t112 = b[5] ^ b[13]; + assign t113 = b[6] ^ b[14]; + assign t114 = b[7] ^ b[15]; + assign t115 = b[0] ^ b[ 8]; + assign t116 = b[1] ^ b[ 9]; + assign t117 = b[2] ^ b[10]; + assign t118 = b[3] ^ b[11]; + assign t119 = a[4] ^ a[12]; + assign t120 = a[5] ^ a[13]; + assign t121 = a[6] ^ a[14]; + assign t122 = a[7] ^ a[15]; + assign t123 = a[0] ^ a[ 8]; + assign t124 = a[1] ^ a[ 9]; + assign t125 = a[2] ^ a[10]; + assign t126 = a[3] ^ a[11]; + assign t127 = t126 & t118; + assign t128 = t126 & t115; + assign t129 = t126 & t116; + assign t130 = t126 & t117; + assign t131 = t123 & t118; + assign t132 = t124 & t118; + assign t133 = t125 & t118; + assign t134 = t125 & t117; + assign t135 = t125 & t115; + assign t136 = t125 & t116; + assign t137 = t123 & t117; + assign t138 = t124 & t117; + assign t139 = t124 & t116; + assign t140 = t124 & t115; + assign t141 = t123 & t116; + assign t142 = t123 & t115; + assign t143 = t122 & t114; + assign t144 = t122 & t111; + assign t145 = t122 & t112; + assign t146 = t122 & t113; + assign t147 = t119 & t114; + assign t148 = t120 & t114; + assign t149 = t121 & t114; + assign t150 = t121 & t113; + assign t151 = t121 & t111; + assign t152 = t121 & t112; + assign t153 = t119 & t113; + assign t154 = t120 & t113; + assign t155 = t120 & t112; + assign t156 = t120 & t111; + assign t157 = t119 & t112; + assign t158 = t119 & t111; + assign t159 = t115 ^ t111; + assign t160 = t116 ^ t112; + assign t161 = t117 ^ t113; + assign t162 = t118 ^ t114; + assign t163 = t123 ^ t119; + assign t164 = t124 ^ t120; + assign t165 = t125 ^ t121; + assign t166 = t126 ^ t122; + assign t167 = t166 & t162; + assign t168 = t166 & t159; + assign t169 = t166 & t160; + assign t170 = t166 & t161; + assign t171 = t163 & t162; + assign t172 = t164 & t162; + assign t173 = t165 & t162; + assign t174 = t165 & t161; + assign t175 = t165 & t159; + assign t176 = t165 & t160; + assign t177 = t163 & t161; + assign t178 = t164 & t161; + assign t179 = t164 & t160; + assign t180 = t164 & t159; + assign t181 = t163 & t160; + assign t182 = t163 & t159; + assign t183 = t73 ^ t76; + assign t184 = t97 ^ t100; + assign t185 = t15 ^ t18; + assign t186 = t129 ^ t132; + assign t187 = t134 ^ t158; + assign t188 = t145 ^ t148; + assign t189 = t169 ^ t172; + assign t190 = t2 ^ t5; + assign t191 = t21 ^ t23; + assign t192 = t31 ^ t34; + assign t193 = t37 ^ t39; + assign t194 = t47 ^ t50; + assign t195 = t53 ^ t55; + assign t196 = t183 ^ t78; + assign t197 = t192 ^ t193; + assign t198 = t194 ^ t195; + assign t199 = t184 ^ t102; + assign t200 = t185 ^ t191; + assign t201 = t186 ^ t187; + assign t202 = t188 ^ t150; + assign t203 = t189 ^ t174; + assign z28 = t190 ^ t7; + assign t204 = t198 ^ z0; + assign z4 = t110 ^ t204; + assign t205 = t200 ^ z28; + assign z24 = t196 ^ t205; + assign t206 = t197 ^ t199; + assign t207 = t197 ^ t86; + assign t208 = t202 ^ t205; + assign z20 = t207 ^ t208; + assign t209 = t142 ^ t204; + assign z8 = t206 ^ t209; + assign t210 = t196 ^ t198; + assign t211 = t201 ^ t206; + assign t212 = t208 ^ t210; + assign t213 = t211 ^ t212; + assign t214 = t200 ^ t201; + assign t215 = t110 ^ t182; + assign t216 = t209 ^ t214; + assign t217 = t215 ^ t207; + assign z12 = t217 ^ t216; + assign z16 = t213 ^ t203; + assign t218 = t74 ^ t77; + assign t219 = t84 ^ t85; + assign t220 = t13 ^ t14; + assign t221 = t98 ^ t101; + assign t222 = t108 ^ t109; + assign t223 = t130 ^ t133; + assign t224 = t140 ^ t141; + assign t225 = t146 ^ t149; + assign t226 = t156 ^ t157; + assign t227 = t170 ^ t173; + assign t228 = t19 ^ t22; + assign t229 = t180 ^ t181; + assign t230 = t29 ^ t30; + assign z29 = t3 ^ t6; + assign t231 = t35 ^ t38; + assign t232 = t45 ^ t46; + assign t233 = t51 ^ t54; + assign z1 = t61 ^ t62; + assign t234 = t228 ^ t220; + assign t235 = t230 ^ t231; + assign t236 = t232 ^ t233; + assign t237 = t223 ^ t226; + assign t238 = z29 ^ t234; + assign z25 = t218 ^ t238; + assign t239 = z1 ^ t236; + assign z5 = t222 ^ t239; + assign t240 = t219 ^ t235; + assign t241 = t235 ^ t221; + assign t242 = t224 ^ t239; + assign z9 = t241 ^ t242; + assign t243 = t225 ^ t238; + assign z21 = t240 ^ t243; + assign t244 = t218 ^ t236; + assign t245 = t237 ^ t241; + assign t246 = t243 ^ t244; + assign t247 = t245 ^ t227; + assign t248 = t234 ^ t237; + assign t249 = t222 ^ t240; + assign t250 = t242 ^ t248; + assign t251 = t249 ^ t229; + assign z17 = t247 ^ t246; + assign z13 = t251 ^ t250; + assign t252 = t10 ^ t12; + assign t253 = t79 ^ t81; + assign t254 = t103 ^ t105; + assign t255 = t127 ^ t151; + assign t256 = t135 ^ t137; + assign t257 = t153 ^ t155; + assign t258 = t16 ^ t8; + assign t259 = t175 ^ t177; + assign t260 = t24 ^ t26; + assign t261 = t28 ^ t32; + assign t262 = t40 ^ t42; + assign t263 = t44 ^ t48; + assign t264 = t56 ^ t58; + assign t265 = t252 ^ t258; + assign t266 = t261 ^ t260; + assign t267 = t262 ^ t263; + assign z2 = t264 ^ t60; + assign t268 = t253 ^ t83; + assign t269 = t254 ^ t107; + assign t270 = t255 ^ t257; + assign t271 = t256 ^ t139; + assign t272 = t259 ^ t179; + assign t273 = t265 ^ z30; + assign z26 = t71 ^ t273; + assign t274 = t267 ^ z2; + assign z6 = t269 ^ t274; + assign t275 = t266 ^ t268; + assign t276 = t266 ^ t95 ; + assign t277 = t271 ^ t274; + assign z10 = t276 ^ t277; + assign t278 = t143 ^ t273; + assign z22 = t275 ^ t278; + assign t279 = t265 ^ t269; + assign t280 = t270 ^ t275; + assign t281 = t277 ^ t279; + assign t282 = t280 ^ t281; + assign t283 = t267 ^ t270; + assign t284 = t71 ^ t167; + assign t285 = t278 ^ t283; + assign t286 = t284 ^ t276; + assign z14 = t282 ^ t272; + assign z18 = t286 ^ t285; + assign t287 = t9 ^ t11; + assign t288 = t72 ^ t75; + assign t289 = t80 ^ t82; + assign t290 = t96 ^ t99; + assign t291 = t104 ^ t106; + assign t292 = t1 ^ t4; + assign t293 = t128 ^ t131; + assign t294 = t136 ^ t138; + assign t295 = t144 ^ t147; + assign t296 = t152 ^ t154; + assign t297 = t17 ^ t20; + assign t298 = t168 ^ t171; + assign t299 = t176 ^ t178; + assign t300 = t25 ^ t27; + assign t301 = t33 ^ t36; + assign t302 = t41 ^ t43; + assign t303 = t49 ^ t52; + assign t304 = t57 ^ t59; + assign z27 = t287 ^ t292; + assign t305 = t296 ^ t295; + assign t306 = t297 ^ t300; + assign t307 = t298 ^ t299; + assign t308 = t301 ^ t302; + assign z3 = t303 ^ t304; + assign t309 = t288 ^ t289; + assign t310 = t290 ^ t291; + assign t311 = t293 ^ t294; + assign t312 = z27 ^ t306; + assign z23 = t309 ^ t312; + assign t313 = t308 ^ z3; + assign z7 = t310 ^ t313; + assign t314 = t305 ^ t308; + assign z19 = t312 ^ t314; + assign t315 = t306 ^ t311; + assign z11 = t313 ^ t315; + assign t316 = t305 ^ t311; + assign t317 = z23 ^ z7; + assign t318 = t316 ^ t307; + assign z15 = t318 ^ t317; + + assign r = {1'b0,z30,z29,z28,z27,z26,z25,z24,z23,z22,z21,z20,z19,z18,z17,z16, + z15,z14,z13,z12,z11,z10, z9, z8, z7, z6, z5, z4, z3, z2, z1,z0}; +endmodule diff --git a/rtl/ibex_sm4_sbox.sv b/rtl/ibex_sm4_sbox.sv new file mode 100644 index 0000000000..0d3d7ba6fa --- /dev/null +++ b/rtl/ibex_sm4_sbox.sv @@ -0,0 +1,198 @@ +// Copyright lowRISC contributors. +// Copyright 2018 ETH Zurich and University of Bologna, see also CREDITS.md. +// Licensed under the Apache License, Version 2.0, see LICENSE for details. +// SPDX-License-Identifier: Apache-2.0 + +/** + * SM4 Sbox unit + * This modified version is derived from the orignal implemenatation by Markku-Juhani O. Saarinen, + * which bases on the optimised AES structure proposed by Boyar & Peralta [BoPe12]. + * S-Boxes are broken into a nonlinear middle layer and two linear top and bottom layers. + * Two linear top and bottom layers are modified to adapt for SM4 cipher. + * + * [BoPe12] Boyar J., Peralta R. "A Small Depth-16 Circuit for the AES + * S-Box." Proc.SEC 2012. IFIP AICT 376. Springer, pp. 287-298 (2012) + * DOI: https://doi.org/10.1007/978-3-642-30436-1_24 + * Preprint: https://eprint.iacr.org/2011/332.pdf + */ +module ibex_sm4_sbox ( +input logic [7:0] in, +output logic [7:0] fx +); + +// sm4_sbox_top +function automatic logic [20:0] sm4_sbox_top(logic [7:0] x); + logic y20; + logic y19, y18, y17, y16, y15, y14, y13, y12, y11, y10; + logic y9, y8, y7, y6, y5, y4, y3, y2, y1, y0 ; + logic t6, t5, t4, t3, t2, t1, t0 ; + + y18 = x[ 2] ^ x[ 6]; + t0 = x[ 3] ^ x[ 4]; + t1 = x[ 2] ^ x[ 7]; + t2 = x[ 7] ^ y18 ; + t3 = x[ 1] ^ t1 ; + t4 = x[ 6] ^ x[ 7]; + t5 = x[ 0] ^ y18 ; + t6 = x[ 3] ^ x[ 6]; + y10 = x[ 1] ^ y18; + y0 = x[ 5] ^~ y10; + y1 = t0 ^ t3 ; + y2 = x[ 0] ^ t0 ; + y4 = x[ 0] ^ t3 ; + y3 = x[ 3] ^ y4 ; + y5 = x[ 5] ^ t5 ; + y6 = x[ 0] ^~ x[ 1]; + y7 = t0 ^~ y10; + y8 = t0 ^ t5 ; + y9 = x[ 3]; + y11 = t0 ^ t4 ; + y12 = x[ 5] ^ t4 ; + y13 = x[ 5] ^~ y1 ; + y14 = x[ 4] ^~ t2 ; + y15 = x[ 1] ^~ t6 ; + y16 = x[ 0] ^~ t2 ; + y17 = t0 ^~ t2 ; + y19 = x[ 5] ^~ y14; + y20 = x[ 0] ^ t1 ; + + return {y20, y19, y18, y17, y16, y15, y14, y13, y12, y11, + y10, y9, y8, y7, y6, y5, y4, y3, y2, y1, y0}; +endfunction + +// sm4_sbox_out +function automatic logic [7:0] sm4_sbox_out(logic [17:0] x); + logic [7:0] y; + logic t29, t28, t27, t26, t25, t24, t23, t22, t21, t20; + logic t19, t18, t17, t16, t15, t14, t13, t12, t11, t10; + logic t9, t8, t7, t6, t5, t4, t3, t2, t1, t0 ; + t0 = x[ 4] ^ x[ 7]; + t1 = x[13] ^ x[15]; + t2 = x[ 2] ^ x[16]; + t3 = x[ 6] ^ t0; + t4 = x[12] ^ t1; + t5 = x[ 9] ^ x[10]; + t6 = x[11] ^ t2; + t7 = x[ 1] ^ t4; + t8 = x[ 0] ^ x[17]; + t9 = x[ 3] ^ x[17]; + t10 = x[ 8] ^ t3; + t11 = t2 ^ t5; + t12 = x[14] ^ t6; + t13 = t7 ^ t9; + t14 = x[ 0] ^ x[ 6]; + t15 = x[ 7] ^ x[16]; + t16 = x[ 5] ^ x[13]; + t17 = x[ 3] ^ x[15]; + t18 = x[10] ^ x[12]; + t19 = x[ 9] ^ t1 ; + t20 = x[ 4] ^ t4 ; + t21 = x[14] ^ t3 ; + t22 = x[16] ^ t5 ; + t23 = t7 ^ t14; + t24 = t8 ^ t11; + t25 = t0 ^ t12; + t26 = t17 ^ t3 ; + t27 = t18 ^ t10; + t28 = t19 ^ t6 ; + t29 = t8 ^ t10; + y[0] = t11 ^~ t13; + y[1] = t15 ^~ t23; + y[2] = t20 ^ t24; + y[3] = t16 ^ t25; + y[4] = t26 ^~ t22; + y[5] = t21 ^ t13; + y[6] = t27 ^~ t12; + y[7] = t28 ^~ t29; + + return y; +endfunction + + +// sm4_sbox_inv_mid +function automatic logic [17:0] sm4_sbox_inv_mid(logic [20:0] x); + logic [17:0] y; + logic t45, t44, t43, t42, t41, t40; + logic t39, t38, t37, t36, t35, t34, t33, t32, t31, t30; + logic t29, t28, t27, t26, t25, t24, t23, t22, t21, t20; + logic t19, t18, t17, t16, t15, t14, t13, t12, t11, t10; + logic t9, t8, t7, t6, t5, t4, t3, t2, t1, t0 ; + t0 = x[ 3] ^ x[12]; + t1 = x[ 9] & x[ 5]; + t2 = x[17] & x[ 6]; + t3 = x[10] ^ t1 ; + t4 = x[14] & x[ 0]; + t5 = t4 ^ t1 ; + t6 = x[ 3] & x[12]; + t7 = x[16] & x[ 7]; + t8 = t0 ^ t6 ; + t9 = x[15] & x[13]; + t10 = t9 ^ t6 ; + t11 = x[ 1] & x[11]; + t12 = x[ 4] & x[20]; + t13 = t12 ^ t11 ; + t14 = x[ 2] & x[ 8]; + t15 = t14 ^ t11 ; + t16 = t3 ^ t2 ; + t17 = t5 ^ x[18]; + t18 = t8 ^ t7 ; + t19 = t10 ^ t15 ; + t20 = t16 ^ t13 ; + t21 = t17 ^ t15 ; + t22 = t18 ^ t13 ; + t23 = t19 ^ x[19]; + t24 = t22 ^ t23 ; + t25 = t22 & t20 ; + t26 = t21 ^ t25 ; + t27 = t20 ^ t21 ; + t28 = t23 ^ t25 ; + t29 = t28 & t27 ; + t30 = t26 & t24 ; + t31 = t20 & t23 ; + t32 = t27 & t31 ; + t33 = t27 ^ t25 ; + t34 = t21 & t22 ; + t35 = t24 & t34 ; + t36 = t24 ^ t25 ; + t37 = t21 ^ t29 ; + t38 = t32 ^ t33 ; + t39 = t23 ^ t30 ; + t40 = t35 ^ t36 ; + t41 = t38 ^ t40 ; + t42 = t37 ^ t39 ; + t43 = t37 ^ t38 ; + t44 = t39 ^ t40 ; + t45 = t42 ^ t41 ; + y[ 0] = t38 & x[ 7]; + y[ 1] = t37 & x[13]; + y[ 2] = t42 & x[11]; + y[ 3] = t45 & x[20]; + y[ 4] = t41 & x[ 8]; + y[ 5] = t44 & x[ 9]; + y[ 6] = t40 & x[17]; + y[ 7] = t39 & x[14]; + y[ 8] = t43 & x[ 3]; + y[ 9] = t38 & x[16]; + y[10] = t37 & x[15]; + y[11] = t42 & x[ 1]; + y[12] = t45 & x[ 4]; + y[13] = t41 & x[ 2]; + y[14] = t44 & x[ 5]; + y[15] = t40 & x[ 6]; + y[16] = t39 & x[ 0]; + y[17] = t43 & x[12]; + + return y; +endfunction + +logic [20:0] t1; +logic [17:0] t2; + +assign t1 = sm4_sbox_top(in); +assign t2 = sm4_sbox_inv_mid(t1); +assign fx = sm4_sbox_out(t2); + +endmodule + + + diff --git a/rtl/ibex_top.sv b/rtl/ibex_top.sv index 71bc1ba7be..2bce8d169b 100644 --- a/rtl/ibex_top.sv +++ b/rtl/ibex_top.sv @@ -21,6 +21,7 @@ module ibex_top import ibex_pkg::*; #( parameter bit RV32E = 1'b0, parameter rv32m_e RV32M = RV32MFast, parameter rv32b_e RV32B = RV32BNone, + parameter rv32zk_e RV32K = RV32ZkNone, parameter regfile_e RegFile = RegFileFF, parameter bit BranchTargetALU = 1'b0, parameter bit WritebackStage = 1'b0, @@ -195,6 +196,7 @@ module ibex_top import ibex_pkg::*; #( .RV32E (RV32E), .RV32M (RV32M), .RV32B (RV32B), + .RV32K (RV32K), .BranchTargetALU (BranchTargetALU), .ICache (ICache), .ICacheECC (ICacheECC), @@ -662,6 +664,7 @@ module ibex_top import ibex_pkg::*; #( .RV32E (RV32E), .RV32M (RV32M), .RV32B (RV32B), + .RV32K (RV32K), .BranchTargetALU (BranchTargetALU), .ICache (ICache), .ICacheECC (ICacheECC), diff --git a/rtl/ibex_top_tracing.sv b/rtl/ibex_top_tracing.sv index c08fc18443..736e4e5790 100644 --- a/rtl/ibex_top_tracing.sv +++ b/rtl/ibex_top_tracing.sv @@ -15,6 +15,7 @@ module ibex_top_tracing import ibex_pkg::*; #( parameter bit RV32E = 1'b0, parameter rv32m_e RV32M = RV32MFast, parameter rv32b_e RV32B = RV32BNone, + parameter rv32zk_e RV32K = RV32ZkNone, parameter regfile_e RegFile = RegFileFF, parameter bit BranchTargetALU = 1'b0, parameter bit WritebackStage = 1'b0, @@ -136,6 +137,7 @@ module ibex_top_tracing import ibex_pkg::*; #( .RV32E ( RV32E ), .RV32M ( RV32M ), .RV32B ( RV32B ), + .RV32K ( RV32K ), .RegFile ( RegFile ), .BranchTargetALU ( BranchTargetALU ), .ICache ( ICache ), diff --git a/rtl/ibex_zk.sv b/rtl/ibex_zk.sv new file mode 100644 index 0000000000..e62fada301 --- /dev/null +++ b/rtl/ibex_zk.sv @@ -0,0 +1,421 @@ +// Copyright lowRISC contributors. +// Copyright 2018 ETH Zurich and University of Bologna, see also CREDITS.md. +// Licensed under the Apache License, Version 2.0, see LICENSE for details. +// SPDX-License-Identifier: Apache-2.0 + +/** + * Zk Extension unit: An implemenation for the RISC-V Cryptography Extension. + */ +module ibex_zk #( + parameter ibex_pkg::rv32zk_e RV32Zk = ibex_pkg::RV32ZkNone +) ( + input ibex_pkg::alu_op_e operator_i, + input logic [31:0] operand_a_i, + input logic [31:0] operand_b_i, + + output logic [31:0] result_o, + output logic zk_val_o +); + import ibex_pkg::*; +`define RORI32(a,b) ((a >> b) | (a << 32-b)) +`define ROLI32(a,b) ((a << b) | (a >> 32-b)) +`define SRLI32(a,b) ((a >> b) ) +`define SLLI32(a,b) ((a << b) ) + +// 32-bit Barrel Right Rotation +function automatic logic [31:0] ror32(logic [31:0] x, logic [4:0] amt); + logic [31:0] ro, l8, l4, l2, l1, l0; + l0 = x; + l1 = ({32{amt[0]}} & {l0[ 0], l0[31: 1]}) | ({32{!amt[0]}} & l0[31:0]); + l2 = ({32{amt[1]}} & {l1[ 1:0], l1[31: 2]}) | ({32{!amt[1]}} & l1[31:0]); + l4 = ({32{amt[2]}} & {l2[ 3:0], l2[31: 4]}) | ({32{!amt[2]}} & l2[31:0]); + l8 = ({32{amt[3]}} & {l4[ 7:0], l4[31: 8]}) | ({32{!amt[3]}} & l4[31:0]); + ro = ({32{amt[4]}} & {l8[15:0], l8[31:16]}) | ({32{!amt[4]}} & l8[31:0]); + return ro; +endfunction + +// 32-bit Barrel Left Rotation +function automatic logic [31:0] rol32(logic [31:0] x, logic [4:0] amt); + logic [31:0] ro, l8, l4, l2, l1, l0; + l0 = x; + l1 = ({32{amt[0]}} & {l0[30:0], l0[31 ]}) | ({32{!amt[0]}} & l0[31:0]); + l2 = ({32{amt[1]}} & {l1[29:0], l1[31:30]}) | ({32{!amt[1]}} & l1[31:0]); + l4 = ({32{amt[2]}} & {l2[27:0], l2[31:28]}) | ({32{!amt[2]}} & l2[31:0]); + l8 = ({32{amt[3]}} & {l4[23:0], l4[31:24]}) | ({32{!amt[3]}} & l4[31:0]); + ro = ({32{amt[4]}} & {l8[15:0], l8[31:16]}) | ({32{!amt[4]}} & l8[31:0]); + return ro; +endfunction + +// reverse 8 bits +function automatic logic [7:0] rev8(logic [7:0] x); + logic [7:0] rb; + for (int i = 0; i < 8; i = i + 1) begin + rb[i] = x[8-i-1]; + end + return rb; +endfunction + +// 32-bit Zip +function automatic logic [31:0] zip32(logic [31:0] x); + logic [31:0] uz; + for (int i = 0; i < 16; i = i + 1) begin + uz[2*i ] = x[i]; + uz[2*i+1] = x[i+16]; + end + return uz; +endfunction + +// 32-bit UnZip +function automatic logic [31:0] unzip32(logic [31:0] x); + logic [15:0] zh, zl; + for (int i = 0; i < 16; i = i + 1) begin + zh[i] = x[2*i + 1]; + zl[i] = x[2*i ]; + end + return {zh, zl}; +endfunction + + +// Multiply by 2 in GF(2^8) modulo 8'h1b +function automatic logic [7:0] xtime2(logic [7:0] a); + logic [7:0] x2; + x2 = {a[6:0],1'b0} ^ (a[7] ? 8'h1b : 8'b0 ); + return x2; +endfunction + +// Paired down multiply by X in GF(2^8) +function automatic logic [7:0] xtimeN(logic [7:0] a, logic [3:0] b); + logic [7:0] xn; + xn = (b[0] ? a : 0) ^ + (b[1] ? xtime2( a) : 0) ^ + (b[2] ? xtime2(xtime2( a)) : 0) ^ + (b[3] ? xtime2(xtime2(xtime2(a))): 0) ; + return xn; +endfunction + + logic zkb_val; + logic [31:0] zkb_result; + if (RV32Zk != RV32ZkNone) begin : gen_zkb + logic ror_sel, rol_sel, rori_sel, andn_sel, orn_sel, xnor_sel; + logic pack_sel, packh_sel, brev8_sel, rev8_sel, zip_sel, unzip_sel; + logic clmull_sel, clmulh_sel, xperm8_sel, xperm4_sel; + assign ror_sel = (operator_i == ZKB_ROR); + assign rol_sel = (operator_i == ZKB_ROL); + assign rori_sel = (operator_i == ZKB_RORI); + assign andn_sel = (operator_i == ZKB_ANDN); + assign orn_sel = (operator_i == ZKB_ORN); + assign xnor_sel = (operator_i == ZKB_XNOR); + assign pack_sel = (operator_i == ZKB_PACK); + assign packh_sel = (operator_i == ZKB_PACKH); + assign brev8_sel = (operator_i == ZKB_BREV8); + assign rev8_sel = (operator_i == ZKB_REV8); + assign zip_sel = (operator_i == ZKB_ZIP); + assign unzip_sel = (operator_i == ZKB_UNZIP); + assign clmull_sel = (operator_i == ZKB_CLMUL ); + assign clmulh_sel = (operator_i == ZKB_CLMULH); + assign xperm8_sel = (operator_i == ZKB_XPERM8); + assign xperm4_sel = (operator_i == ZKB_XPERM4); + + logic [ 4:0] shamt; + assign shamt = operand_b_i[4:0]; + + logic [31:0] wror, wrol, wandn, worn, wxnor, wpack, wpackh; + assign wror = ror32(operand_a_i, shamt); + assign wrol = rol32(operand_a_i, shamt); + assign wandn = operand_a_i & (~operand_b_i); + assign worn = operand_a_i | (~operand_b_i); + assign wxnor = operand_a_i ^ (~operand_b_i); + assign wpack = { operand_b_i[15:0], operand_a_i[15:0]}; + assign wpackh = {16'd0, operand_b_i[ 7:0], operand_a_i[ 7:0]}; + + logic [ 7:0] rs1_b0, rs1_b1, rs1_b2, rs1_b3; + assign rs1_b0 = operand_a_i[ 7: 0]; + assign rs1_b1 = operand_a_i[15: 8]; + assign rs1_b2 = operand_a_i[23:16]; + assign rs1_b3 = operand_a_i[31:24]; + + logic [ 7:0] brev8_0, brev8_1, brev8_2, brev8_3; + assign brev8_0 = rev8(rs1_b0); + assign brev8_1 = rev8(rs1_b1); + assign brev8_2 = rev8(rs1_b2); + assign brev8_3 = rev8(rs1_b3); + + logic [31:0] wbrev8, wrev8; + assign wbrev8 = {brev8_3, brev8_2, brev8_1, brev8_0}; + assign wrev8 = {rs1_b0, rs1_b1, rs1_b2, rs1_b3}; + + logic [31:0] wzip, wunzip; + assign wzip = zip32( operand_a_i); + assign wunzip = unzip32(operand_a_i); + + // Xperm instructions + // indexable access 4-bit LUT. + logic [ 3:0] lut_4b [8]; + logic [31:0] wxperm4; + for(genvar i = 0; i < 8; i = i + 1) begin : gen_lut_xperm4 + // generate table. + assign lut_4b[i] = operand_a_i[4*i+:4]; + + logic [2:0] lut_8idx; + assign lut_8idx = operand_b_i[4*i+:3]; + + logic [3:0] lut4_out; + assign lut4_out = lut_4b[lut_8idx]; + assign wxperm4[i*4+:4] = operand_b_i[4*i+3] ? 4'b0000 : lut4_out; + end + + // indexable access 8-bit LUT. + logic [ 7:0] lut_8b [4]; + logic [31:0] wxperm8; + for(genvar i = 0; i < 4; i = i + 1) begin : gen_lut_xperm8 + // generate table. + assign lut_8b[i] = operand_a_i[8*i+:8]; + + logic [1:0] lut_4idx; + assign lut_4idx = operand_b_i[8*i+:2]; + + logic [7:0] lut8_out; + assign lut8_out = lut_8b[lut_4idx]; + assign wxperm8[i*8+:8] = |{operand_b_i[8*i+7:8*i+2]} ? 8'd0 : lut8_out; + end + + // clmul instructions + logic [15:0] lhs0, rhs0, lhs1, rhs1, lhs2, rhs2; + assign lhs0 = clmulh_sel? operand_a_i[31:16] : operand_a_i[15: 0]; + assign rhs0 = clmulh_sel? operand_b_i[31:16] : operand_b_i[15: 0]; + + assign lhs1 = operand_a_i[15: 0]; + assign rhs1 = operand_b_i[31:16]; + + assign lhs2 = operand_a_i[31:16]; + assign rhs2 = operand_b_i[15: 0]; + + logic [31:0] polymul0, polymul1, polymul2; + ibex_poly16_mul mul16_ins0(.a(lhs0), .b(rhs0), .r(polymul0)); + ibex_poly16_mul mul16_ins1(.a(lhs1), .b(rhs1), .r(polymul1)); + ibex_poly16_mul mul16_ins2(.a(lhs2), .b(rhs2), .r(polymul2)); + + logic [31:0] wclmull, wclmulh, clmulm; + assign clmulm = polymul1 ^ polymul2; + assign wclmulh = {polymul0[31:16], (polymul0[15: 0] ^ clmulm[31:16]) }; + assign wclmull = { (polymul0[31:16] ^ clmulm[15: 0]), polymul0[15: 0]}; + + assign zkb_val = |{ror_sel, rol_sel, rori_sel, andn_sel, orn_sel, xnor_sel, + pack_sel, packh_sel, brev8_sel, rev8_sel, zip_sel, unzip_sel, + clmull_sel, clmulh_sel, xperm8_sel, xperm4_sel}; + assign zkb_result = {32{ ror_sel}} & wror | + {32{ rol_sel}} & wrol | + {32{ rori_sel}} & wror | + {32{ andn_sel}} & wandn | + {32{ orn_sel}} & worn | + {32{ xnor_sel}} & wxnor | + {32{ pack_sel}} & wpack | + {32{ packh_sel}} & wpackh | + {32{ brev8_sel}} & wbrev8 | + {32{ rev8_sel}} & wrev8 | + {32{ zip_sel}} & wzip | + {32{ unzip_sel}} & wunzip | + {32{clmull_sel}} & wclmull | + {32{clmulh_sel}} & wclmulh | + {32{xperm8_sel}} & wxperm8 | + {32{xperm4_sel}} & wxperm4 ; + end else begin : gen_no_zkb + assign zkb_val = 1'b0; + assign zkb_result = 32'd0; + end + + logic zkn_val; + logic [31:0] zkn_result; + + if (RV32Zk == RV32Zkn) begin : gen_zkn + logic bs0, bs1, bs2, bs3; //byte select in aes instructions + assign bs0 = (operator_i == ZKN_AES32DSB0) || (operator_i == ZKN_AES32DSMB0) || + (operator_i == ZKN_AES32ESB0) || (operator_i == ZKN_AES32ESMB0) ; + assign bs1 = (operator_i == ZKN_AES32DSB1) || (operator_i == ZKN_AES32DSMB1) || + (operator_i == ZKN_AES32ESB1) || (operator_i == ZKN_AES32ESMB1) ; + assign bs2 = (operator_i == ZKN_AES32DSB2) || (operator_i == ZKN_AES32DSMB2) || + (operator_i == ZKN_AES32ESB2) || (operator_i == ZKN_AES32ESMB2) ; + assign bs3 = (operator_i == ZKN_AES32DSB3) || (operator_i == ZKN_AES32DSMB3) || + (operator_i == ZKN_AES32ESB3) || (operator_i == ZKN_AES32ESMB3) ; + + logic decs_sel, encs_sel, decsm_sel, encsm_sel; //operation select in aes instructions + assign decs_sel = (operator_i == ZKN_AES32DSB0) || (operator_i == ZKN_AES32DSB1) || + (operator_i == ZKN_AES32DSB2) || (operator_i == ZKN_AES32DSB3) ; + assign encs_sel = (operator_i == ZKN_AES32ESB0) || (operator_i == ZKN_AES32ESB1) || + (operator_i == ZKN_AES32ESB2) || (operator_i == ZKN_AES32ESB3) ; + assign decsm_sel = (operator_i == ZKN_AES32DSMB0) || (operator_i == ZKN_AES32DSMB1) || + (operator_i == ZKN_AES32DSMB2) || (operator_i == ZKN_AES32DSMB3) ; + assign encsm_sel = (operator_i == ZKN_AES32ESMB0) || (operator_i == ZKN_AES32ESMB1) || + (operator_i == ZKN_AES32ESMB2) || (operator_i == ZKN_AES32ESMB3) ; + logic aes32_sel; + assign aes32_sel = decs_sel || encs_sel || encsm_sel || decsm_sel; + + logic [7:0] sel_byte; + assign sel_byte = {8{bs0}} & operand_b_i[ 7: 0] | + {8{bs1}} & operand_b_i[15: 8] | + {8{bs2}} & operand_b_i[23:16] | + {8{bs3}} & operand_b_i[31:24] ; + + logic dec, mix; + assign dec = decs_sel || decsm_sel ; + assign mix = encsm_sel || decsm_sel ; + + logic [7:0] sbox_out; + // SBOX instances + ibex_aes_sbox i_aes_sbox( + .fw (~dec ), + .in (sel_byte ), + .fx (sbox_out) + ); + + logic [7:0] mix_b0, mix_b1, mix_b2, mix_b3; + assign mix_b3 = xtimeN(sbox_out, (dec ? 11 : 3)) ; + assign mix_b2 = dec ? xtimeN(sbox_out, ( 13)) : sbox_out ; + assign mix_b1 = dec ? xtimeN(sbox_out, ( 9)) : sbox_out ; + assign mix_b0 = xtimeN(sbox_out, (dec ? 14 : 2)) ; + + logic [31:0] mixed, sbox_mix, rotated; + assign mixed = {mix_b3, mix_b2, mix_b1, mix_b0}; + assign sbox_mix = mix ? mixed : {24'b0, sbox_out}; + assign rotated = {32{bs0}} & {sbox_mix } | + {32{bs1}} & {sbox_mix[23:0], sbox_mix[31:24] } | + {32{bs2}} & {sbox_mix[15:0], sbox_mix[31:16] } | + {32{bs3}} & {sbox_mix[ 7:0], sbox_mix[31: 8] } ; + + // sha2 instructions + logic sha256_sum0_sel, sha256_sum1_sel, sha256_sig0_sel, sha256_sig1_sel; + assign sha256_sum0_sel = (operator_i == ZKN_SHA256SUM0); + assign sha256_sum1_sel = (operator_i == ZKN_SHA256SUM1); + assign sha256_sig0_sel = (operator_i == ZKN_SHA256SIG0); + assign sha256_sig1_sel = (operator_i == ZKN_SHA256SIG1); + + logic sha512_sum0r_sel, sha512_sum1r_sel; + logic sha512_sig0l_sel, sha512_sig1l_sel; + logic sha512_sig0h_sel, sha512_sig1h_sel; + assign sha512_sum0r_sel = (operator_i == ZKN_SHA512SUM0R); + assign sha512_sum1r_sel = (operator_i == ZKN_SHA512SUM1R); + assign sha512_sig0l_sel = (operator_i == ZKN_SHA512SIG0L); + assign sha512_sig0h_sel = (operator_i == ZKN_SHA512SIG0H); + assign sha512_sig1l_sel = (operator_i == ZKN_SHA512SIG1L); + assign sha512_sig1h_sel = (operator_i == ZKN_SHA512SIG1H); + + + logic[31:0] sha256_sum0, sha256_sum1, sha256_sig0, sha256_sig1; + assign sha256_sig0 = `RORI32(operand_a_i, 7) ^ `RORI32(operand_a_i,18) ^ + `SRLI32(operand_a_i, 3); + assign sha256_sig1 = `RORI32(operand_a_i,17) ^ `RORI32(operand_a_i,19) ^ + `SRLI32(operand_a_i,10); + assign sha256_sum0 = `RORI32(operand_a_i, 2) ^ `RORI32(operand_a_i,13) ^ + `RORI32(operand_a_i,22); + assign sha256_sum1 = `RORI32(operand_a_i, 6) ^ `RORI32(operand_a_i,11) ^ + `RORI32(operand_a_i,25); + + logic[31:0] sha512_sum0r, sha512_sum1r; + logic[31:0] sha512_sig0l, sha512_sig1l; + logic[31:0] sha512_sig0h, sha512_sig1h; + assign sha512_sum0r = `SLLI32(operand_a_i,25)^`SLLI32(operand_a_i,30)^`SRLI32(operand_a_i,28)^ + `SRLI32(operand_b_i, 7)^`SRLI32(operand_b_i, 2)^`SLLI32(operand_b_i, 4); + assign sha512_sum1r = `SLLI32(operand_a_i,23)^`SRLI32(operand_a_i,14)^`SRLI32(operand_a_i,18)^ + `SRLI32(operand_b_i, 9)^`SLLI32(operand_b_i,18)^`SLLI32(operand_b_i,14); + assign sha512_sig0l = `SRLI32(operand_a_i, 1)^`SRLI32(operand_a_i, 7)^`SRLI32(operand_a_i, 8)^ + `SLLI32(operand_b_i,31)^`SLLI32(operand_b_i,25)^`SLLI32(operand_b_i,24); + assign sha512_sig0h = `SRLI32(operand_a_i, 1)^`SRLI32(operand_a_i, 7)^`SRLI32(operand_a_i, 8)^ + `SLLI32(operand_b_i,31) ^`SLLI32(operand_b_i,24); + assign sha512_sig1l = `SLLI32(operand_a_i, 3)^`SRLI32(operand_a_i, 6)^`SRLI32(operand_a_i,19)^ + `SRLI32(operand_b_i,29)^`SLLI32(operand_b_i,26)^`SLLI32(operand_b_i,13); + assign sha512_sig1h = `SLLI32(operand_a_i, 3)^`SRLI32(operand_a_i, 6)^`SRLI32(operand_a_i,19)^ + `SRLI32(operand_b_i,29) ^`SLLI32(operand_b_i,13); + + assign zkn_val = |{sha256_sum0_sel, sha256_sum1_sel, sha256_sig0_sel, sha256_sig1_sel, + sha512_sum0r_sel, sha512_sum1r_sel, + sha512_sig0l_sel, sha512_sig1l_sel, + sha512_sig0h_sel, sha512_sig1h_sel, aes32_sel}; + assign zkn_result = {32{aes32_sel }} & (rotated ^ operand_a_i) | + {32{sha256_sig0_sel }} & sha256_sig0 | + {32{sha256_sig1_sel }} & sha256_sig1 | + {32{sha256_sum0_sel }} & sha256_sum0 | + {32{sha256_sum1_sel }} & sha256_sum1 | + {32{sha512_sum0r_sel}} & sha512_sum0r | + {32{sha512_sum1r_sel}} & sha512_sum1r | + {32{sha512_sig0l_sel}} & sha512_sig0l | + {32{sha512_sig0h_sel}} & sha512_sig0h | + {32{sha512_sig1l_sel}} & sha512_sig1l | + {32{sha512_sig1h_sel}} & sha512_sig1h ; + end else begin : gen_no_zkn + assign zkn_val = 1'b0; + assign zkn_result = 32'd0; + end + + logic zks_val; + logic [31:0] zks_result; + if (RV32Zk == RV32Zks) begin : gen_zks + logic sm4ed_sel, sm4ks_sel, sm3p0_sel, sm3p1_sel; + assign sm4ed_sel = (operator_i == ZKS_SM4EDB0) || (operator_i == ZKS_SM4EDB2) || + (operator_i == ZKS_SM4EDB1) || (operator_i == ZKS_SM4EDB3) ; + assign sm4ks_sel = (operator_i == ZKS_SM4KSB0) || (operator_i == ZKS_SM4KSB2) || + (operator_i == ZKS_SM4KSB1) || (operator_i == ZKS_SM4KSB3) ; + assign sm3p0_sel = (operator_i == ZKS_SM3P0); + assign sm3p1_sel = (operator_i == ZKS_SM3P1); + + logic zks_bs0, zks_bs1, zks_bs2, zks_bs3; //byte select in aes instructions + assign zks_bs0 = (operator_i == ZKS_SM4EDB0) || (operator_i == ZKS_SM4KSB0) ; + assign zks_bs1 = (operator_i == ZKS_SM4EDB1) || (operator_i == ZKS_SM4KSB1) ; + assign zks_bs2 = (operator_i == ZKS_SM4EDB2) || (operator_i == ZKS_SM4KSB2) ; + assign zks_bs3 = (operator_i == ZKS_SM4EDB3) || (operator_i == ZKS_SM4KSB3) ; + logic [7:0] sbox_in; + assign sbox_in = {8{zks_bs0}} & operand_b_i[ 7: 0] | + {8{zks_bs1}} & operand_b_i[15: 8] | + {8{zks_bs2}} & operand_b_i[23:16] | + {8{zks_bs3}} & operand_b_i[31:24] ; + logic [ 7:0] sm4_sbox_out; + // Submodule - SBox + ibex_sm4_sbox ism4_sbox ( + .in (sbox_in), + .fx (sm4_sbox_out) + ); + + logic [31:0] s; + assign s = {24'b0, sm4_sbox_out}; + + // ED Instruction + logic [31:0] ed1, ed2; + assign ed1 = s ^ (s << 8) ^ (s << 2) ^ (s << 18); + assign ed2 = ed1 ^ ((s & 32'h3F) << 26) ^ ((s & 32'hC0) << 10); + + // KS Instruction + logic [31:0] ks1, ks2; + assign ks1 = s ^ ((s & 32'h07) << 29) ^ ((s & 32'hFE) << 7); + assign ks2 = ks1 ^ ((s & 32'h01) << 23) ^ ((s & 32'hF8) << 13); + + // Rotate and XOR result + logic [31:0] rot_in, rot_out, sm4; + assign rot_in = sm4ks_sel ? ks2 : ed2; + assign rot_out = {32{zks_bs0}} & {rot_in } | + {32{zks_bs1}} & {rot_in[23:0], rot_in[31:24] } | + {32{zks_bs2}} & {rot_in[15:0], rot_in[31:16] } | + {32{zks_bs3}} & {rot_in[ 7:0], rot_in[31: 8] } ; + assign sm4 = rot_out ^ operand_a_i ; + + logic [31:0] sm3_p0, sm3_p1; + assign sm3_p0 = operand_a_i ^ `ROLI32(operand_a_i, 9) ^ `ROLI32(operand_a_i,17); + assign sm3_p1 = operand_a_i ^ `ROLI32(operand_a_i, 15) ^ `ROLI32(operand_a_i,23); + + assign zks_val =|{sm4ed_sel, sm4ks_sel, sm3p0_sel, sm3p1_sel}; + assign zks_result = {32{sm4ed_sel}} & sm4 | + {32{sm4ks_sel}} & sm4 | + {32{sm3p0_sel}} & sm3_p0 | + {32{sm3p1_sel}} & sm3_p1 ; + end else begin : gen_no_zks + assign zks_val = 1'b0; + assign zks_result = 32'd0; + end + + assign zk_val_o = zkb_val || zkn_val || zks_val; + assign result_o = zkb_result | zkn_result | zks_result; + +`undef RORI32 +`undef ROLI32 +`undef SRLI32 +`undef SLLI32 + +endmodule