From 268bdaa2ae0d3de1a7b099f9fbd5d8080ad2a82c Mon Sep 17 00:00:00 2001
From: phthinh <hung3@e.ntu.edu.sg>
Date: Tue, 23 Nov 2021 14:40:04 +0000
Subject: [PATCH 01/21] Zk: add support to the zkh extension

---
 ibex_core.core       |   1 +
 rtl/ibex_core.sv     |   4 ++
 rtl/ibex_decoder.sv  | 151 ++++++++++++++++++++++++-------------------
 rtl/ibex_ex_block.sv |  27 ++++++--
 rtl/ibex_id_stage.sv |  18 +++---
 rtl/ibex_lockstep.sv |   2 +
 rtl/ibex_pkg.sv      |  24 ++++++-
 rtl/ibex_top.sv      |   3 +
 8 files changed, 147 insertions(+), 83 deletions(-)

diff --git a/ibex_core.core b/ibex_core.core
index a77ba5999a..0a21432a92 100644
--- a/ibex_core.core
+++ b/ibex_core.core
@@ -34,6 +34,7 @@ filesets:
       - rtl/ibex_pmp.sv
       - rtl/ibex_wb_stage.sv
       - rtl/ibex_dummy_instr.sv
+      - rtl/ibex_zk.sv
       - rtl/ibex_core.sv
     file_type: systemVerilogSource
 
diff --git a/rtl/ibex_core.sv b/rtl/ibex_core.sv
index 13dc144e63..4a472c5cad 100644
--- a/rtl/ibex_core.sv
+++ b/rtl/ibex_core.sv
@@ -21,6 +21,7 @@ module ibex_core import ibex_pkg::*; #(
   parameter bit          RV32E             = 1'b0,
   parameter rv32m_e      RV32M             = RV32MFast,
   parameter rv32b_e      RV32B             = RV32BNone,
+  parameter rv32zk_e     RV32Zk            = RV32ZkNone,
   parameter bit          BranchTargetALU   = 1'b0,
   parameter bit          WritebackStage    = 1'b0,
   parameter bit          ICache            = 1'b0,
@@ -468,6 +469,7 @@ module ibex_core import ibex_pkg::*; #(
     .RV32E          (RV32E),
     .RV32M          (RV32M),
     .RV32B          (RV32B),
+    .RV32Zk         (RV32Zk),
     .BranchTargetALU(BranchTargetALU),
     .DataIndTiming  (DataIndTiming),
     .SpecBranch     (SpecBranch),
@@ -627,6 +629,7 @@ module ibex_core import ibex_pkg::*; #(
   ibex_ex_block #(
     .RV32M          (RV32M),
     .RV32B          (RV32B),
+    .RV32Zk         (RV32Zk),
     .BranchTargetALU(BranchTargetALU)
   ) ex_block_i (
     .clk_i (clk_i),
@@ -911,6 +914,7 @@ module ibex_core import ibex_pkg::*; #(
     .RV32E            (RV32E),
     .RV32M            (RV32M),
     .RV32B            (RV32B)
+//    .RV32Zk           (RV32Zk)
   ) cs_registers_i (
     .clk_i (clk_i),
     .rst_ni(rst_ni),
diff --git a/rtl/ibex_decoder.sv b/rtl/ibex_decoder.sv
index 716b301556..be83356b76 100644
--- a/rtl/ibex_decoder.sv
+++ b/rtl/ibex_decoder.sv
@@ -14,10 +14,11 @@
 `include "prim_assert.sv"
 
 module ibex_decoder #(
-  parameter bit RV32E               = 0,
-  parameter ibex_pkg::rv32m_e RV32M = ibex_pkg::RV32MFast,
-  parameter ibex_pkg::rv32b_e RV32B = ibex_pkg::RV32BNone,
-  parameter bit BranchTargetALU     = 0
+  parameter bit RV32E                 = 0,
+  parameter ibex_pkg::rv32m_e  RV32M  = ibex_pkg::RV32MFast,
+  parameter ibex_pkg::rv32b_e  RV32B  = ibex_pkg::RV32BNone,
+  parameter ibex_pkg::rv32zk_e RV32Zk = ibex_pkg::RV32ZkNone,
+  parameter bit BranchTargetALU       = 0
 ) (
   input  logic                 clk_i,
   input  logic                 rst_ni,
@@ -384,7 +385,15 @@ module ibex_decoder #(
                   7'b001_1000,                                                         // crc32c.b
                   7'b001_1001,                                                         // crc32c.h
                   7'b001_1010: illegal_insn = (RV32B == RV32BFull) ? 1'b0 : 1'b1;      // crc32c.w
-
+                  default: illegal_insn = 1'b1;
+                endcase
+              end
+              5'b0_0010: begin
+                unique case(instr[26:20])
+                  7'b000_0000,                                                         // sha256sum0
+                  7'b000_0001,                                                         // sha256sum1
+                  7'b000_0010,                                                         // sha256sig0
+                  7'b000_0011: illegal_insn = (RV32Zk == RV32Zkn) ? 1'b0 : 1'b1;       // sha256sig1
                   default: illegal_insn = 1'b1;
                 endcase
               end
@@ -501,6 +510,14 @@ module ibex_decoder #(
             {7'b000_0101, 3'b010}, // clmulr
             {7'b000_0101, 3'b011}: illegal_insn = (RV32B == RV32BFull) ? 1'b0 : 1'b1; // clmulh
 
+            // RV32Zk zkh
+            {7'b010_1000, 3'b000}, // sha512_sum0r
+            {7'b010_1001, 3'b000}, // sha512_sum1r
+            {7'b010_1010, 3'b000}, // sha512_sig0l
+            {7'b010_1011, 3'b000}, // sha512_sig1l
+            {7'b010_1110, 3'b000}, // sha512_sig0h
+            {7'b010_1111, 3'b000}: illegal_insn = (RV32Zk == RV32Zkn) ? 1'b0 : 1'b1; // sha512_sig1h
+
             // RV32M instructions
             {7'b000_0001, 3'b000}: begin // mul
               multdiv_operator_o    = MD_OP_MULL;
@@ -820,67 +837,59 @@ module ibex_decoder #(
           3'b111: alu_operator_o = ALU_AND;  // And with Immediate
 
           3'b001: begin
-            if (RV32B != RV32BNone) begin
-              unique case (instr_alu[31:27])
-                5'b0_0000: alu_operator_o = ALU_SLL;    // Shift Left Logical by Immediate
-                5'b0_0100: alu_operator_o = ALU_SLO;    // Shift Left Ones by Immediate
-                5'b0_1001: alu_operator_o = ALU_SBCLR;  // Clear bit specified by immediate
-                5'b0_0101: alu_operator_o = ALU_SBSET;  // Set bit specified by immediate
-                5'b0_1101: alu_operator_o = ALU_SBINV;  // Invert bit specified by immediate.
-                // Shuffle with Immediate Control Value
-                5'b0_0001: if (instr_alu[26] == 0) alu_operator_o = ALU_SHFL;
-                5'b0_1100: begin
-                  unique case (instr_alu[26:20])
-                    7'b000_0000: alu_operator_o = ALU_CLZ;   // clz
-                    7'b000_0001: alu_operator_o = ALU_CTZ;   // ctz
-                    7'b000_0010: alu_operator_o = ALU_PCNT;  // pcnt
-                    7'b000_0100: alu_operator_o = ALU_SEXTB; // sext.b
-                    7'b000_0101: alu_operator_o = ALU_SEXTH; // sext.h
-                    7'b001_0000: begin
-                      if (RV32B == RV32BFull) begin
-                        alu_operator_o = ALU_CRC32_B;  // crc32.b
-                        alu_multicycle_o = 1'b1;
-                      end
-                    end
-                    7'b001_0001: begin
-                      if (RV32B == RV32BFull) begin
-                        alu_operator_o = ALU_CRC32_H;  // crc32.h
-                        alu_multicycle_o = 1'b1;
-                      end
-                    end
-                    7'b001_0010: begin
-                      if (RV32B == RV32BFull) begin
-                        alu_operator_o = ALU_CRC32_W;  // crc32.w
-                        alu_multicycle_o = 1'b1;
-                      end
-                    end
-                    7'b001_1000: begin
-                      if (RV32B == RV32BFull) begin
-                        alu_operator_o = ALU_CRC32C_B; // crc32c.b
-                        alu_multicycle_o = 1'b1;
-                      end
-                    end
-                    7'b001_1001: begin
-                      if (RV32B == RV32BFull) begin
-                        alu_operator_o = ALU_CRC32C_H; // crc32c.h
-                        alu_multicycle_o = 1'b1;
-                      end
-                    end
-                    7'b001_1010: begin
-                      if (RV32B == RV32BFull) begin
-                        alu_operator_o = ALU_CRC32C_W; // crc32c.w
-                        alu_multicycle_o = 1'b1;
-                      end
-                    end
-                    default: ;
-                  endcase
-                end
-
-                default: ;
-              endcase
-            end else begin
-              alu_operator_o = ALU_SLL; // Shift Left Logical by Immediate
-            end
+            unique case (instr_alu[31:27])
+              5'b0_0000: alu_operator_o = ALU_SLL;    // Shift Left Logical by Immediate
+              5'b0_0100: if (RV32B != RV32BNone) alu_operator_o = ALU_SLO  ;  // Shift Left Ones by Immediate
+              5'b0_1001: if (RV32B != RV32BNone) alu_operator_o = ALU_SBCLR;  // Clear bit specified by immediate
+              5'b0_0101: if (RV32B != RV32BNone) alu_operator_o = ALU_SBSET;  // Set bit specified by immediate
+              5'b0_1101: if (RV32B != RV32BNone) alu_operator_o = ALU_SBINV;  // Invert bit specified by immediate.
+              // Shuffle with Immediate Control Value
+              5'b0_0001: if (instr_alu[26] == 0) alu_operator_o = (RV32B == RV32BFull) ? ALU_SHFL : ALU_SLL;
+              5'b0_1100: begin
+                unique case (instr_alu[26:20])
+                  7'b000_0000: if (RV32B != RV32BNone) alu_operator_o = ALU_CLZ  ; // clz
+                  7'b000_0001: if (RV32B != RV32BNone) alu_operator_o = ALU_CTZ  ; // ctz
+                  7'b000_0010: if (RV32B != RV32BNone) alu_operator_o = ALU_PCNT ; // pcnt
+                  7'b000_0100: if (RV32B != RV32BNone) alu_operator_o = ALU_SEXTB; // sext.b
+                  7'b000_0101: if (RV32B != RV32BNone) alu_operator_o = ALU_SEXTH; // sext.h
+                  7'b001_0000: if (RV32B == RV32BFull) begin
+                    alu_operator_o   = ALU_CRC32_B;  // crc32.b
+                    alu_multicycle_o = 1'b1;
+                  end
+                  7'b001_0001: if (RV32B == RV32BFull) begin
+                    alu_operator_o   = ALU_CRC32_H;  // crc32.h
+                    alu_multicycle_o = 1'b1;
+                  end
+                  7'b001_0010: if (RV32B == RV32BFull) begin
+                    alu_operator_o   = ALU_CRC32_W;  // crc32.w
+                    alu_multicycle_o = 1'b1;
+                  end
+                  7'b001_1000: if (RV32B == RV32BFull) begin
+                    alu_operator_o   = ALU_CRC32C_B; // crc32c.b
+                    alu_multicycle_o = 1'b1;
+                  end
+                  7'b001_1001: if (RV32B == RV32BFull) begin
+                    alu_operator_o   = ALU_CRC32C_H; // crc32c.h
+                    alu_multicycle_o = 1'b1;
+                  end
+                  7'b001_1010: if (RV32B == RV32BFull) begin
+                    alu_operator_o   = ALU_CRC32C_W; // crc32c.w
+                    alu_multicycle_o = 1'b1;
+                  end
+                  default: ;
+                endcase
+              end
+              5'b0_0010: begin                                                         // Zk, zkh
+                unique case (instr_alu[26:20])
+                  7'b000_0000: if (RV32Zk == RV32Zkn) alu_operator_o = ZKN_SHA256SUM0; // sha256sum0
+                  7'b000_0001: if (RV32Zk == RV32Zkn) alu_operator_o = ZKN_SHA256SUM1; // sha256sum1
+                  7'b000_0010: if (RV32Zk == RV32Zkn) alu_operator_o = ZKN_SHA256SIG0; // sha256sig0
+                  7'b000_0011: if (RV32Zk == RV32Zkn) alu_operator_o = ZKN_SHA256SIG1; // sha256sig1
+                  default:     alu_operator_o = ALU_SLL;
+                endcase
+              end
+              default: alu_operator_o = ALU_SLL; // Shift Left Logical by Immediate
+            endcase
           end
 
           3'b101: begin
@@ -932,7 +941,7 @@ module ibex_decoder #(
         alu_op_a_mux_sel_o = OP_A_REG_A;
         alu_op_b_mux_sel_o = OP_B_REG_B;
 
-        if (instr_alu[26]) begin
+        if ({instr[26], instr[13:12]} == {1'b1, 2'b01}) begin
           if (RV32B != RV32BNone) begin
             unique case ({instr_alu[26:25], instr_alu[14:12]})
               {2'b11, 3'b001}: begin
@@ -1056,6 +1065,14 @@ module ibex_decoder #(
               end
             end
 
+            // RV32Zk zkh
+            {7'b010_1000, 3'b000}: if (RV32Zk == RV32Zkn) alu_operator_o = ZKN_SHA512SUM0R; // sha512_sum0r
+            {7'b010_1001, 3'b000}: if (RV32Zk == RV32Zkn) alu_operator_o = ZKN_SHA512SUM1R; // sha512_sum1r
+            {7'b010_1010, 3'b000}: if (RV32Zk == RV32Zkn) alu_operator_o = ZKN_SHA512SIG0L; // sha512_sig0l
+            {7'b010_1011, 3'b000}: if (RV32Zk == RV32Zkn) alu_operator_o = ZKN_SHA512SIG1L; // sha512_sig1l
+            {7'b010_1110, 3'b000}: if (RV32Zk == RV32Zkn) alu_operator_o = ZKN_SHA512SIG0H; // sha512_sig0h
+            {7'b010_1111, 3'b000}: if (RV32Zk == RV32Zkn) alu_operator_o = ZKN_SHA512SIG1H; // sha512_sig1h
+
             // RV32M instructions, all use the same ALU operation
             {7'b000_0001, 3'b000}: begin // mul
               alu_operator_o = ALU_ADD;
diff --git a/rtl/ibex_ex_block.sv b/rtl/ibex_ex_block.sv
index ee900164b9..127cf3d5b2 100644
--- a/rtl/ibex_ex_block.sv
+++ b/rtl/ibex_ex_block.sv
@@ -9,9 +9,10 @@
  * Execution block: Hosts ALU and MUL/DIV unit
  */
 module ibex_ex_block #(
-  parameter ibex_pkg::rv32m_e RV32M           = ibex_pkg::RV32MFast,
-  parameter ibex_pkg::rv32b_e RV32B           = ibex_pkg::RV32BNone,
-  parameter bit               BranchTargetALU = 0
+  parameter ibex_pkg::rv32m_e  RV32M           = ibex_pkg::RV32MFast,
+  parameter ibex_pkg::rv32b_e  RV32B           = ibex_pkg::RV32BNone,
+  parameter ibex_pkg::rv32zk_e RV32Zk          = ibex_pkg::RV32ZkNone,
+  parameter bit                BranchTargetALU = 0
 ) (
   input  logic                  clk_i,
   input  logic                  rst_ni,
@@ -55,11 +56,12 @@ module ibex_ex_block #(
 
   import ibex_pkg::*;
 
-  logic [31:0] alu_result, multdiv_result;
+  logic [31:0] alu_result, zke_result, multdiv_result;
 
   logic [32:0] multdiv_alu_operand_b, multdiv_alu_operand_a;
   logic [33:0] alu_adder_result_ext;
   logic        alu_cmp_result, alu_is_equal_result;
+  logic        zke_val;
   logic        multdiv_valid;
   logic        multdiv_sel;
   logic [31:0] alu_imd_val_q[2];
@@ -86,7 +88,7 @@ module ibex_ex_block #(
 
   assign alu_imd_val_q = '{imd_val_q_i[0][31:0], imd_val_q_i[1][31:0]};
 
-  assign result_ex_o  = multdiv_sel ? multdiv_result : alu_result;
+  assign result_ex_o  = multdiv_sel ? multdiv_result : (zke_val ? zke_result : alu_result);
 
   // branch handling
   assign branch_decision_o  = alu_cmp_result;
@@ -133,6 +135,21 @@ module ibex_ex_block #(
     .is_equal_result_o  (alu_is_equal_result)
   );
 
+  //////////////////
+  // Zk Extension //
+  //////////////////
+  if (RV32Zk != RV32ZkNone) begin : gen_Zkn
+  ibex_zk #(
+    .RV32Zk(RV32Zk)
+  ) zkn_i (
+    .operator_i         (alu_operator_i),
+    .operand_a_i        (alu_operand_a_i),
+    .operand_b_i        (alu_operand_b_i),
+    .result_o           (zke_result),
+    .zk_val_o           (zke_val)
+    );
+  end
+
   ////////////////
   // Multiplier //
   ////////////////
diff --git a/rtl/ibex_id_stage.sv b/rtl/ibex_id_stage.sv
index 1af8d326f1..c6f9a9a58f 100644
--- a/rtl/ibex_id_stage.sv
+++ b/rtl/ibex_id_stage.sv
@@ -18,14 +18,15 @@
 `include "dv_fcov_macros.svh"
 
 module ibex_id_stage #(
-  parameter bit               RV32E           = 0,
-  parameter ibex_pkg::rv32m_e RV32M           = ibex_pkg::RV32MFast,
-  parameter ibex_pkg::rv32b_e RV32B           = ibex_pkg::RV32BNone,
-  parameter bit               DataIndTiming   = 1'b0,
-  parameter bit               BranchTargetALU = 0,
-  parameter bit               SpecBranch      = 0,
-  parameter bit               WritebackStage  = 0,
-  parameter bit               BranchPredictor = 0
+  parameter bit                RV32E           = 0,
+  parameter ibex_pkg::rv32m_e  RV32M           = ibex_pkg::RV32MFast,
+  parameter ibex_pkg::rv32b_e  RV32B           = ibex_pkg::RV32BNone,
+  parameter ibex_pkg::rv32zk_e RV32Zk          = ibex_pkg::RV32ZkNone,
+  parameter bit                DataIndTiming   = 1'b0,
+  parameter bit                BranchTargetALU = 0,
+  parameter bit                SpecBranch      = 0,
+  parameter bit                WritebackStage  = 0,
+  parameter bit                BranchPredictor = 0
 ) (
   input  logic                      clk_i,
   input  logic                      rst_ni,
@@ -426,6 +427,7 @@ module ibex_id_stage #(
     .RV32E          (RV32E),
     .RV32M          (RV32M),
     .RV32B          (RV32B),
+    .RV32Zk         (RV32Zk),
     .BranchTargetALU(BranchTargetALU)
   ) decoder_i (
     .clk_i (clk_i),
diff --git a/rtl/ibex_lockstep.sv b/rtl/ibex_lockstep.sv
index c42ef7c8dc..28bb2aa8c2 100644
--- a/rtl/ibex_lockstep.sv
+++ b/rtl/ibex_lockstep.sv
@@ -16,6 +16,7 @@ module ibex_lockstep import ibex_pkg::*; #(
   parameter bit          RV32E             = 1'b0,
   parameter rv32m_e      RV32M             = RV32MFast,
   parameter rv32b_e      RV32B             = RV32BNone,
+  parameter rv32zk_e     RV32Zk            = RV32ZkNone,
   parameter bit          BranchTargetALU   = 1'b0,
   parameter bit          WritebackStage    = 1'b0,
   parameter bit          ICache            = 1'b0,
@@ -323,6 +324,7 @@ module ibex_lockstep import ibex_pkg::*; #(
     .RV32E             ( RV32E             ),
     .RV32M             ( RV32M             ),
     .RV32B             ( RV32B             ),
+    .RV32Zk            ( RV32Zk            ),
     .BranchTargetALU   ( BranchTargetALU   ),
     .ICache            ( ICache            ),
     .ICacheECC         ( ICacheECC         ),
diff --git a/rtl/ibex_pkg.sv b/rtl/ibex_pkg.sv
index 5875899a80..69df1e28ea 100644
--- a/rtl/ibex_pkg.sv
+++ b/rtl/ibex_pkg.sv
@@ -50,6 +50,13 @@ package ibex_pkg;
     RV32BFull     = 2
   } rv32b_e;
 
+  typedef enum integer {
+      RV32ZkNone     = 0,
+      RV32Zbkb       = 1,
+      RV32Zkn        = 2,
+      RV32Zks        = 3
+    } rv32zk_e;
+
   /////////////
   // Opcodes //
   /////////////
@@ -73,7 +80,7 @@ package ibex_pkg;
   // ALU operations //
   ////////////////////
 
-  typedef enum logic [5:0] {
+  typedef enum logic [6:0] {
     // Arithmetics
     ALU_ADD,
     ALU_SUB,
@@ -176,7 +183,19 @@ package ibex_pkg;
     ALU_CRC32_H,
     ALU_CRC32C_H,
     ALU_CRC32_W,
-    ALU_CRC32C_W
+    ALU_CRC32C_W,
+
+    // Zkn/Zks
+    ZKN_SHA256SUM0,
+    ZKN_SHA256SUM1,
+    ZKN_SHA256SIG0,
+    ZKN_SHA256SIG1,
+    ZKN_SHA512SUM0R,
+    ZKN_SHA512SUM1R,
+    ZKN_SHA512SIG0L,
+    ZKN_SHA512SIG1L,
+    ZKN_SHA512SIG0H,
+    ZKN_SHA512SIG1H
   } alu_op_e;
 
   typedef enum logic [1:0] {
@@ -187,7 +206,6 @@ package ibex_pkg;
     MD_OP_REM
   } md_op_e;
 
-
   //////////////////////////////////
   // Control and status registers //
   //////////////////////////////////
diff --git a/rtl/ibex_top.sv b/rtl/ibex_top.sv
index 90bc2357f7..d58086d4cf 100644
--- a/rtl/ibex_top.sv
+++ b/rtl/ibex_top.sv
@@ -21,6 +21,7 @@ module ibex_top import ibex_pkg::*; #(
   parameter bit          RV32E            = 1'b0,
   parameter rv32m_e      RV32M            = RV32MFast,
   parameter rv32b_e      RV32B            = RV32BNone,
+  parameter rv32zk_e     RV32Zk           = RV32Zkn,
   parameter regfile_e    RegFile          = RegFileFF,
   parameter bit          BranchTargetALU  = 1'b0,
   parameter bit          WritebackStage   = 1'b0,
@@ -195,6 +196,7 @@ module ibex_top import ibex_pkg::*; #(
     .RV32E            (RV32E),
     .RV32M            (RV32M),
     .RV32B            (RV32B),
+    .RV32Zk           (RV32Zk),
     .BranchTargetALU  (BranchTargetALU),
     .ICache           (ICache),
     .ICacheECC        (ICacheECC),
@@ -659,6 +661,7 @@ module ibex_top import ibex_pkg::*; #(
       .RV32E            (RV32E),
       .RV32M            (RV32M),
       .RV32B            (RV32B),
+      .RV32Zk           (RV32Zk),
       .BranchTargetALU  (BranchTargetALU),
       .ICache           (ICache),
       .ICacheECC        (ICacheECC),

From 0e43b0e327b3aecd2b93cef52b4d3ff6601004f7 Mon Sep 17 00:00:00 2001
From: phthinh <hung3@e.ntu.edu.sg>
Date: Tue, 23 Nov 2021 14:42:43 +0000
Subject: [PATCH 02/21] Zk: add implementation for the zkh extension

---
 rtl/ibex_zk.sv | 84 ++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 84 insertions(+)
 create mode 100644 rtl/ibex_zk.sv

diff --git a/rtl/ibex_zk.sv b/rtl/ibex_zk.sv
new file mode 100644
index 0000000000..d7efc2c3b3
--- /dev/null
+++ b/rtl/ibex_zk.sv
@@ -0,0 +1,84 @@
+// Copyright lowRISC contributors.
+// Copyright 2018 ETH Zurich and University of Bologna, see also CREDITS.md.
+// Licensed under the Apache License, Version 2.0, see LICENSE for details.
+// SPDX-License-Identifier: Apache-2.0
+
+/**
+ * Zk Extension unit
+ */
+module ibex_zk #(
+  parameter ibex_pkg::rv32zk_e RV32Zk = ibex_pkg::RV32ZkNone
+) (
+  input  ibex_pkg::alu_op_e operator_i,
+  input  logic [31:0]       operand_a_i,
+  input  logic [31:0]       operand_b_i,
+
+  output logic [31:0]       result_o,
+  output logic              zk_val_o
+);
+  import ibex_pkg::*;
+`define RORI32(a,b) ((a >> b) | (a << 32-b))
+`define SRLI32(a,b) ((a >> b)              )
+`define SLLI32(a,b) ((a << b)              )
+
+  if (RV32Zk == RV32Zkn) begin : gen_zkn
+    logic  sha256_sum0_sel, sha256_sum1_sel, sha256_sig0_sel, sha256_sig1_sel;
+    assign sha256_sum0_sel  = (operator_i == ZKN_SHA256SUM0);
+    assign sha256_sum1_sel  = (operator_i == ZKN_SHA256SUM1);
+    assign sha256_sig0_sel  = (operator_i == ZKN_SHA256SIG0);
+    assign sha256_sig1_sel  = (operator_i == ZKN_SHA256SIG1);
+
+    logic  sha512_sum0r_sel, sha512_sum1r_sel;
+    logic  sha512_sig0l_sel, sha512_sig1l_sel;
+    logic  sha512_sig0h_sel, sha512_sig1h_sel;
+    assign sha512_sum0r_sel = (operator_i == ZKN_SHA512SUM0R);
+    assign sha512_sum1r_sel = (operator_i == ZKN_SHA512SUM1R);
+    assign sha512_sig0l_sel = (operator_i == ZKN_SHA512SIG0L);
+    assign sha512_sig0h_sel = (operator_i == ZKN_SHA512SIG0H);
+    assign sha512_sig1l_sel = (operator_i == ZKN_SHA512SIG1L);
+    assign sha512_sig1h_sel = (operator_i == ZKN_SHA512SIG1H);
+
+
+    logic[31:0]  sha256_sum0, sha256_sum1, sha256_sig0, sha256_sig1;
+    assign sha256_sig0  = `RORI32(operand_a_i, 7) ^ `RORI32(operand_a_i,18) ^ `SRLI32(operand_a_i, 3);
+    assign sha256_sig1  = `RORI32(operand_a_i,17) ^ `RORI32(operand_a_i,19) ^ `SRLI32(operand_a_i,10);
+    assign sha256_sum0  = `RORI32(operand_a_i, 2) ^ `RORI32(operand_a_i,13) ^ `RORI32(operand_a_i,22);
+    assign sha256_sum1  = `RORI32(operand_a_i, 6) ^ `RORI32(operand_a_i,11) ^ `RORI32(operand_a_i,25);
+
+    logic[31:0]  sha512_sum0r, sha512_sum1r;
+    logic[31:0]  sha512_sig0l, sha512_sig1l;
+    logic[31:0]  sha512_sig0h, sha512_sig1h;
+    assign sha512_sum0r = `SLLI32(operand_a_i,25)^`SLLI32(operand_a_i,30)^`SRLI32(operand_a_i,28)^
+                          `SRLI32(operand_b_i, 7)^`SRLI32(operand_b_i, 2)^`SLLI32(operand_b_i, 4);
+    assign sha512_sum1r = `SLLI32(operand_a_i,23)^`SRLI32(operand_a_i,14)^`SRLI32(operand_a_i,18)^
+                          `SRLI32(operand_b_i, 9)^`SLLI32(operand_b_i,18)^`SLLI32(operand_b_i,14);
+    assign sha512_sig0l = `SRLI32(operand_a_i, 1)^`SRLI32(operand_a_i, 7)^`SRLI32(operand_a_i, 8)^
+                          `SLLI32(operand_b_i,31)^`SLLI32(operand_b_i,25)^`SLLI32(operand_b_i,24);
+    assign sha512_sig0h = `SRLI32(operand_a_i, 1)^`SRLI32(operand_a_i, 7)^`SRLI32(operand_a_i, 8)^
+                          `SLLI32(operand_b_i,31)                        ^`SLLI32(operand_b_i,24);
+    assign sha512_sig1l = `SLLI32(operand_a_i, 3)^`SRLI32(operand_a_i, 6)^`SRLI32(operand_a_i,19)^
+                          `SRLI32(operand_b_i,29)^`SLLI32(operand_b_i,26)^`SLLI32(operand_b_i,13);
+    assign sha512_sig1h = `SLLI32(operand_a_i, 3)^`SRLI32(operand_a_i, 6)^`SRLI32(operand_a_i,19)^
+                          `SRLI32(operand_b_i,29)                        ^`SLLI32(operand_b_i,13);
+
+    assign zk_val_o     = |{sha256_sum0_sel, sha256_sum1_sel, sha256_sig0_sel, sha256_sig1_sel,
+                           sha512_sum0r_sel, sha512_sum1r_sel,
+                           sha512_sig0l_sel, sha512_sig1l_sel,
+                           sha512_sig0h_sel, sha512_sig1h_sel  };
+    assign result_o     = {32{sha256_sig0_sel }} & sha256_sig0  |
+                          {32{sha256_sig1_sel }} & sha256_sig1  |
+                          {32{sha256_sum0_sel }} & sha256_sum0  |
+                          {32{sha256_sum1_sel }} & sha256_sum1  |
+                          {32{sha512_sum0r_sel}} & sha512_sum0r |
+                          {32{sha512_sum1r_sel}} & sha512_sum1r |
+                          {32{sha512_sig0l_sel}} & sha512_sig0l |
+                          {32{sha512_sig0h_sel}} & sha512_sig0h |
+                          {32{sha512_sig1l_sel}} & sha512_sig1l |
+                          {32{sha512_sig1h_sel}} & sha512_sig1h ;
+  end
+
+`undef RORI32
+`undef SRLI32
+`undef SLLI32
+
+endmodule

From 033900c68ddc613dd82a1bc76e9f460dc5cb0309 Mon Sep 17 00:00:00 2001
From: phthinh <hung3@e.ntu.edu.sg>
Date: Wed, 24 Nov 2021 10:21:10 +0000
Subject: [PATCH 03/21] Zk: add support for the aes instructions

---
 rtl/ibex_decoder.sv | 20 ++++++++++++++++++++
 rtl/ibex_pkg.sv     | 20 ++++++++++++++++++--
 2 files changed, 38 insertions(+), 2 deletions(-)

diff --git a/rtl/ibex_decoder.sv b/rtl/ibex_decoder.sv
index be83356b76..90cc9f8c71 100644
--- a/rtl/ibex_decoder.sv
+++ b/rtl/ibex_decoder.sv
@@ -457,6 +457,8 @@ module ibex_decoder #(
         rf_we           = 1'b1;
         if ({instr[26], instr[13:12]} == {1'b1, 2'b01}) begin
           illegal_insn = (RV32B != RV32BNone) ? 1'b0 : 1'b1; // cmix / cmov / fsl / fsr
+        end else if ({instr[29:28],instr[25], instr[14:12]} == {3'b101, 3'b000}) begin
+          illegal_insn = (RV32Zk == RV32Zkn) ? 1'b0 : 1'b1; // aes32ds / aes32dsm / aes32es / aes32esm
         end else begin
           unique case ({instr[31:25], instr[14:12]})
             // RV32I ALU operations
@@ -1073,6 +1075,24 @@ module ibex_decoder #(
             {7'b010_1110, 3'b000}: if (RV32Zk == RV32Zkn) alu_operator_o = ZKN_SHA512SIG0H; // sha512_sig0h
             {7'b010_1111, 3'b000}: if (RV32Zk == RV32Zkn) alu_operator_o = ZKN_SHA512SIG1H; // sha512_sig1h
 
+            // RV32Zk zkde
+            {7'b001_0001, 3'b000}: if (RV32Zk == RV32Zkn) alu_operator_o = ZKN_AES32ESB0; // aes32esb0
+            {7'b011_0001, 3'b000}: if (RV32Zk == RV32Zkn) alu_operator_o = ZKN_AES32ESB1; // aes32esb1
+            {7'b101_0001, 3'b000}: if (RV32Zk == RV32Zkn) alu_operator_o = ZKN_AES32ESB2; // aes32esb2
+            {7'b111_0001, 3'b000}: if (RV32Zk == RV32Zkn) alu_operator_o = ZKN_AES32ESB3; // aes32esb3
+            {7'b001_0011, 3'b000}: if (RV32Zk == RV32Zkn) alu_operator_o = ZKN_AES32ESMB0; // aes32esmb0
+            {7'b011_0011, 3'b000}: if (RV32Zk == RV32Zkn) alu_operator_o = ZKN_AES32ESMB1; // aes32esmb1
+            {7'b101_0011, 3'b000}: if (RV32Zk == RV32Zkn) alu_operator_o = ZKN_AES32ESMB2; // aes32esmb2
+            {7'b111_0011, 3'b000}: if (RV32Zk == RV32Zkn) alu_operator_o = ZKN_AES32ESMB3; // aes32esmb3
+            {7'b001_0101, 3'b000}: if (RV32Zk == RV32Zkn) alu_operator_o = ZKN_AES32DSB0; // aes32dsb0
+            {7'b011_0101, 3'b000}: if (RV32Zk == RV32Zkn) alu_operator_o = ZKN_AES32DSB1; // aes32dsb1
+            {7'b101_0101, 3'b000}: if (RV32Zk == RV32Zkn) alu_operator_o = ZKN_AES32DSB2; // aes32dsb2
+            {7'b111_0101, 3'b000}: if (RV32Zk == RV32Zkn) alu_operator_o = ZKN_AES32DSB3; // aes32dsb3
+            {7'b001_0111, 3'b000}: if (RV32Zk == RV32Zkn) alu_operator_o = ZKN_AES32DSMB0; // aes32dsmb0
+            {7'b011_0111, 3'b000}: if (RV32Zk == RV32Zkn) alu_operator_o = ZKN_AES32DSMB1; // aes32dsmb1
+            {7'b101_0111, 3'b000}: if (RV32Zk == RV32Zkn) alu_operator_o = ZKN_AES32DSMB2; // aes32dsmb2
+            {7'b111_0111, 3'b000}: if (RV32Zk == RV32Zkn) alu_operator_o = ZKN_AES32DSMB3; // aes32dsmb3
+
             // RV32M instructions, all use the same ALU operation
             {7'b000_0001, 3'b000}: begin // mul
               alu_operator_o = ALU_ADD;
diff --git a/rtl/ibex_pkg.sv b/rtl/ibex_pkg.sv
index 69df1e28ea..7d1f484006 100644
--- a/rtl/ibex_pkg.sv
+++ b/rtl/ibex_pkg.sv
@@ -185,7 +185,7 @@ package ibex_pkg;
     ALU_CRC32_W,
     ALU_CRC32C_W,
 
-    // Zkn/Zks
+    // Zkn
     ZKN_SHA256SUM0,
     ZKN_SHA256SUM1,
     ZKN_SHA256SIG0,
@@ -195,7 +195,23 @@ package ibex_pkg;
     ZKN_SHA512SIG0L,
     ZKN_SHA512SIG1L,
     ZKN_SHA512SIG0H,
-    ZKN_SHA512SIG1H
+    ZKN_SHA512SIG1H,
+    ZKN_AES32DSB0,
+    ZKN_AES32DSB1,
+    ZKN_AES32DSB2,
+    ZKN_AES32DSB3,
+    ZKN_AES32DSMB0,
+    ZKN_AES32DSMB1,
+    ZKN_AES32DSMB2,
+    ZKN_AES32DSMB3,
+    ZKN_AES32ESB0,
+    ZKN_AES32ESB1,
+    ZKN_AES32ESB2,
+    ZKN_AES32ESB3,
+    ZKN_AES32ESMB0,
+    ZKN_AES32ESMB1,
+    ZKN_AES32ESMB2,
+    ZKN_AES32ESMB3
   } alu_op_e;
 
   typedef enum logic [1:0] {

From dee7228847dc418837208d3a10eb9bc31fad0d94 Mon Sep 17 00:00:00 2001
From: phthinh <hung3@e.ntu.edu.sg>
Date: Wed, 24 Nov 2021 11:19:40 +0000
Subject: [PATCH 04/21] Zk: add the implementation for AES instructions

---
 rtl/ibex_zk.sv | 77 ++++++++++++++++++++++++++++++++++++++++++++++++--
 1 file changed, 75 insertions(+), 2 deletions(-)

diff --git a/rtl/ibex_zk.sv b/rtl/ibex_zk.sv
index d7efc2c3b3..1693c761ec 100644
--- a/rtl/ibex_zk.sv
+++ b/rtl/ibex_zk.sv
@@ -21,7 +21,79 @@ module ibex_zk #(
 `define SRLI32(a,b) ((a >> b)              )
 `define SLLI32(a,b) ((a << b)              )
 
+// Multiply by 2 in GF(2^8) modulo 8'h1b
+function automatic logic [7:0] xtime2(logic [7:0] a);
+    logic [7:0] xtime2;
+    xtime2  = {a[6:0],1'b0} ^ (a[7] ? 8'h1b : 8'b0 );
+    return xtime2;
+endfunction
+
+// Paired down multiply by X in GF(2^8)
+function automatic logic [7:0] xtimeN(logic [7:0] a, logic [3:0] b);
+    logic [7:0] xtimeN;
+    xtimeN = (b[0] ?                      a   : 0) ^
+             (b[1] ? xtime2(              a)  : 0) ^
+             (b[2] ? xtime2(xtime2(       a)) : 0) ^
+             (b[3] ? xtime2(xtime2(xtime2(a))): 0) ;
+    return xtimeN;
+endfunction
+
   if (RV32Zk == RV32Zkn) begin : gen_zkn
+    logic bs0, bs1, bs2, bs3; //byte select in aes instructions
+    assign bs0 = (operator_i == ZKN_AES32DSB0) || (operator_i == ZKN_AES32DSMB0) ||
+                 (operator_i == ZKN_AES32ESB0) || (operator_i == ZKN_AES32ESMB0) ;
+    assign bs1 = (operator_i == ZKN_AES32DSB1) || (operator_i == ZKN_AES32DSMB1) ||
+                 (operator_i == ZKN_AES32ESB1) || (operator_i == ZKN_AES32ESMB1) ;
+    assign bs2 = (operator_i == ZKN_AES32DSB2) || (operator_i == ZKN_AES32DSMB2) ||
+                 (operator_i == ZKN_AES32ESB2) || (operator_i == ZKN_AES32ESMB2) ;
+    assign bs3 = (operator_i == ZKN_AES32DSB3) || (operator_i == ZKN_AES32DSMB3) ||
+                 (operator_i == ZKN_AES32ESB3) || (operator_i == ZKN_AES32ESMB3) ;
+
+    logic decs_sel, encs_sel, decsm_sel, encsm_sel; //operation select in aes instructions
+    assign decs_sel  = (operator_i == ZKN_AES32DSB0)  || (operator_i == ZKN_AES32DSB1)  ||
+                       (operator_i == ZKN_AES32DSB2)  || (operator_i == ZKN_AES32DSB3)  ;
+    assign encs_sel  = (operator_i == ZKN_AES32ESB0)  || (operator_i == ZKN_AES32ESB1)  ||
+                       (operator_i == ZKN_AES32ESB2)  || (operator_i == ZKN_AES32ESB3)  ;
+    assign decsm_sel = (operator_i == ZKN_AES32DSMB0) || (operator_i == ZKN_AES32DSMB1) ||
+                       (operator_i == ZKN_AES32DSMB2) || (operator_i == ZKN_AES32DSMB3) ;
+    assign encsm_sel = (operator_i == ZKN_AES32ESMB0) || (operator_i == ZKN_AES32ESMB1) ||
+                       (operator_i == ZKN_AES32ESMB2) || (operator_i == ZKN_AES32ESMB3) ;
+    logic  aes32_sel;
+    assign aes32_sel = decs_sel || encs_sel || encsm_sel || decsm_sel;
+
+    logic  [7:0] sel_byte;
+    assign       sel_byte = {8{bs0}} & operand_b_i[ 7: 0] |
+                            {8{bs1}} & operand_b_i[15: 8] |
+                            {8{bs2}} & operand_b_i[23:16] |
+                            {8{bs3}} & operand_b_i[31:24] ;
+
+    logic  dec, mix;
+    assign dec      = decs_sel  || decsm_sel  ;
+    assign mix      = encsm_sel || decsm_sel  ;
+
+    logic [7:0] sbox_out;
+    // SBOX instances
+    ibex_aes_sbox  i_aes_sbox(
+        .fw (~dec            ),
+        .in (sel_byte    ),
+        .fx (sbox_out)
+    );
+
+    logic [7:0] mix_b0, mix_b1, mix_b2, mix_b3;
+    assign mix_b3 =       xtimeN(sbox_out, (dec ? 11  : 3))            ;
+    assign mix_b2 = dec ? xtimeN(sbox_out, (           13)) : sbox_out ;
+    assign mix_b1 = dec ? xtimeN(sbox_out, (            9)) : sbox_out ;
+    assign mix_b0 =       xtimeN(sbox_out, (dec ? 14  : 2))            ;
+
+    logic [31:0] mixed, sbox_mix, rotated;
+    assign mixed    = {mix_b3, mix_b2, mix_b1, mix_b0};
+    assign sbox_mix = mix ? mixed : {24'b0, sbox_out};
+    assign rotated  = {32{bs0}} & {sbox_mix                        } |
+                      {32{bs1}} & {sbox_mix[23:0], sbox_mix[31:24] } |
+                      {32{bs2}} & {sbox_mix[15:0], sbox_mix[31:16] } |
+                      {32{bs3}} & {sbox_mix[ 7:0], sbox_mix[31: 8] } ;
+
+    // sha2 instructions
     logic  sha256_sum0_sel, sha256_sum1_sel, sha256_sig0_sel, sha256_sig1_sel;
     assign sha256_sum0_sel  = (operator_i == ZKN_SHA256SUM0);
     assign sha256_sum1_sel  = (operator_i == ZKN_SHA256SUM1);
@@ -64,8 +136,9 @@ module ibex_zk #(
     assign zk_val_o     = |{sha256_sum0_sel, sha256_sum1_sel, sha256_sig0_sel, sha256_sig1_sel,
                            sha512_sum0r_sel, sha512_sum1r_sel,
                            sha512_sig0l_sel, sha512_sig1l_sel,
-                           sha512_sig0h_sel, sha512_sig1h_sel  };
-    assign result_o     = {32{sha256_sig0_sel }} & sha256_sig0  |
+                           sha512_sig0h_sel, sha512_sig1h_sel, aes32_sel};
+    assign result_o     = {32{aes32_sel       }} & (rotated ^ operand_a_i) |
+                          {32{sha256_sig0_sel }} & sha256_sig0  |
                           {32{sha256_sig1_sel }} & sha256_sig1  |
                           {32{sha256_sum0_sel }} & sha256_sum0  |
                           {32{sha256_sum1_sel }} & sha256_sum1  |

From 4a9472eadfe1d357155ae2dbac942ca81596d131 Mon Sep 17 00:00:00 2001
From: phthinh <hung3@e.ntu.edu.sg>
Date: Wed, 24 Nov 2021 11:20:10 +0000
Subject: [PATCH 05/21] Zk: add the implementation for AES instructions

---
 rtl/ibex_aes_sbox.sv | 280 +++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 280 insertions(+)
 create mode 100644 rtl/ibex_aes_sbox.sv

diff --git a/rtl/ibex_aes_sbox.sv b/rtl/ibex_aes_sbox.sv
new file mode 100644
index 0000000000..9197c3ef3a
--- /dev/null
+++ b/rtl/ibex_aes_sbox.sv
@@ -0,0 +1,280 @@
+// Copyright lowRISC contributors.
+// Copyright 2018 ETH Zurich and University of Bologna, see also CREDITS.md.
+// Licensed under the Apache License, Version 2.0, see LICENSE for details.
+// SPDX-License-Identifier: Apache-2.0
+
+/**
+ * AES Sbox unit
+ */
+module ibex_aes_sbox (
+input  logic       fw,
+input  logic [7:0] in,
+output logic [7:0] fx
+
+);
+
+// aes_sbox_top
+function automatic logic [20:0] aes_sbox_top(logic [7:0] x);
+    logic  y20;
+    logic  y19, y18, y17, y16, y15, y14, y13, y12, y11, y10;
+    logic  y9,  y8,  y7,  y6,  y5,  y4,  y3,  y2,  y1,  y0 ;
+    logic  t5,  t4,  t3,  t2,  t1,  t0 ;
+
+    assign y0    = x[ 0] ;
+    assign y1    = x[ 7] ^     x[ 4];
+    assign y2    = x[ 7] ^     x[ 2];
+    assign y3    = x[ 7] ^     x[ 1];
+    assign y4    = x[ 4] ^     x[ 2];
+    assign t0    = x[ 3] ^     x[ 1];
+    assign y5    = y1    ^     t0   ;
+    assign t1    = x[ 6] ^     x[ 5];
+    assign y6    = x[ 0] ^     y5   ;
+    assign y7    = x[ 0] ^     t1   ;
+    assign y8    = y5    ^     t1   ;
+    assign t2    = x[ 6] ^     x[ 2];
+    assign t3    = x[ 5] ^     x[ 2];
+    assign y9    = y3    ^     y4   ;
+    assign y10   = y5    ^     t2   ;
+    assign y11   = t0    ^     t2   ;
+    assign y12   = t0    ^     t3   ;
+    assign y13   = y7    ^     y12  ;
+    assign t4    = x[ 4] ^     x[ 0];
+    assign y14   = t1    ^     t4   ;
+    assign y15   = y1    ^     y14  ;
+    assign t5    = x[ 1] ^     x[ 0];
+    assign y16   = t1    ^     t5   ;
+    assign y17   = y2    ^     y16  ;
+    assign y18   = y2    ^     y8   ;
+    assign y19   = y15   ^     y13  ;
+    assign y20   = y1    ^     t3   ;
+
+    return {y20, y19, y18, y17, y16, y15, y14, y13, y12, y11,
+            y10, y9,  y8,  y7,  y6,  y5,  y4,  y3,  y2,  y1,  y0};
+endfunction
+
+// aes_sbox_out
+function automatic logic [7:0] aes_sbox_out(logic [17:0] x);
+    logic [7:0] y;
+    logic  t29, t28, t27, t26, t25, t24, t23, t22, t21, t20;
+    logic  t19, t18, t17, t16, t15, t14, t13, t12, t11, t10;
+    logic  t9,  t8,  t7,  t6,  t5,  t4,  t3,  t2,  t1,  t0 ;
+    assign t0   = x[11] ^  x[12];
+    assign t1   = x[0] ^   x[6];
+    assign t2   = x[14] ^  x[16];
+    assign t3   = x[15] ^  x[5];
+    assign t4   = x[4] ^   x[8];
+    assign t5   = x[17] ^  x[11];
+    assign t6   = x[12] ^  t5;
+    assign t7   = x[14] ^  t3;
+    assign t8   = x[1] ^   x[9];
+    assign t9   = x[2] ^   x[3];
+    assign t10  = x[3] ^   t4;
+    assign t11  = x[10] ^  t2;
+    assign t12  = x[16] ^  x[1];
+    assign t13  = x[0] ^   t0;
+    assign t14  = x[2] ^   x[11];
+    assign t15  = x[5] ^   t1;
+    assign t16  = x[6] ^   t0;
+    assign t17  = x[7] ^   t1;
+    assign t18  = x[8] ^   t8;
+    assign t19  = x[13] ^  t4;
+    assign t20  = t0 ^     t1;
+    assign t21  = t1 ^     t7;
+    assign t22  = t3 ^     t12;
+    assign t23  = t18 ^    t2;
+    assign t24  = t15 ^    t9;
+    assign t25  = t6 ^     t10;
+    assign t26  = t7 ^     t9;
+    assign t27  = t8 ^     t10;
+    assign t28  = t11 ^    t14;
+    assign t29  = t11 ^    t17;
+    assign  y[0] = t6 ^~  t23;
+    assign  y[1] = t13 ^~ t27;
+    assign  y[2] = t25 ^  t29;
+    assign  y[3] = t20 ^  t22;
+    assign  y[4] = t6 ^   t21;
+    assign  y[5] = t19 ^~ t28;
+    assign  y[6] = t16 ^~ t26;
+    assign  y[7] = t6 ^   t24;
+    return  y;
+endfunction
+
+
+// aes_sbox_inv_mid
+function automatic logic [17:0] aes_sbox_inv_mid(logic [20:0] x);
+    logic [17:0] y;
+    logic  t45, t44, t43, t42, t41, t40;
+    logic  t39, t38, t37, t36, t35, t34, t33, t32, t31, t30;
+    logic  t29, t28, t27, t26, t25, t24, t23, t22, t21, t20;
+    logic  t19, t18, t17, t16, t15, t14, t13, t12, t11, t10;
+    logic  t9,  t8,  t7,  t6,  t5,  t4,  t3,  t2,  t1,  t0 ;
+    assign t0  = x[ 3] ^     x[12];
+    assign t1  = x[ 9] &     x[ 5];
+    assign t2  = x[17] &     x[ 6];
+    assign t3  = x[10] ^     t1   ;
+    assign t4  = x[14] &     x[ 0];
+    assign t5  = t4    ^     t1   ;
+    assign t6  = x[ 3] &     x[12];
+    assign t7  = x[16] &     x[ 7];
+    assign t8  = t0    ^     t6   ;
+    assign t9  = x[15] &     x[13];
+    assign t10 = t9    ^     t6   ;
+    assign t11 = x[ 1] &     x[11];
+    assign t12 = x[ 4] &     x[20];
+    assign t13 = t12   ^     t11  ;
+    assign t14 = x[ 2] &     x[ 8];
+    assign t15 = t14   ^     t11  ;
+    assign t16 = t3    ^     t2   ;
+    assign t17 = t5    ^     x[18];
+    assign t18 = t8    ^     t7   ;
+    assign t19 = t10   ^     t15  ;
+    assign t20 = t16   ^     t13  ;
+    assign t21 = t17   ^     t15  ;
+    assign t22 = t18   ^     t13  ;
+    assign t23 = t19   ^     x[19];
+    assign t24 = t22   ^     t23  ;
+    assign t25 = t22   &     t20  ;
+    assign t26 = t21   ^     t25  ;
+    assign t27 = t20   ^     t21  ;
+    assign t28 = t23   ^     t25  ;
+    assign t29 = t28   &     t27  ;
+    assign t30 = t26   &     t24  ;
+    assign t31 = t20   &     t23  ;
+    assign t32 = t27   &     t31  ;
+    assign t33 = t27   ^     t25  ;
+    assign t34 = t21   &     t22  ;
+    assign t35 = t24   &     t34  ;
+    assign t36 = t24   ^     t25  ;
+    assign t37 = t21   ^     t29  ;
+    assign t38 = t32   ^     t33  ;
+    assign t39 = t23   ^     t30  ;
+    assign t40 = t35   ^     t36  ;
+    assign t41 = t38   ^     t40  ;
+    assign t42 = t37   ^     t39  ;
+    assign t43 = t37   ^     t38  ;
+    assign t44 = t39   ^     t40  ;
+    assign t45 = t42   ^     t41  ;
+
+    assign  y[ 0] = t38 &     x[ 7];
+    assign  y[ 1] = t37 &     x[13];
+    assign  y[ 2] = t42 &     x[11];
+    assign  y[ 3] = t45 &     x[20];
+    assign  y[ 4] = t41 &     x[ 8];
+    assign  y[ 5] = t44 &     x[ 9];
+    assign  y[ 6] = t40 &     x[17];
+    assign  y[ 7] = t39 &     x[14];
+    assign  y[ 8] = t43 &     x[ 3];
+    assign  y[ 9] = t38 &     x[16];
+    assign  y[10] = t37 &     x[15];
+    assign  y[11] = t42 &     x[ 1];
+    assign  y[12] = t45 &     x[ 4];
+    assign  y[13] = t41 &     x[ 2];
+    assign  y[14] = t44 &     x[ 5];
+    assign  y[15] = t40 &     x[ 6];
+    assign  y[16] = t39 &     x[ 0];
+    assign  y[17] = t43 &     x[12];
+
+    return y;
+endfunction
+
+// inverse aes_sbox_top
+function automatic logic [20:0] aes_inv_sbox_top(logic [7:0] x);
+    logic  y20;
+    logic  y19, y18, y17, y16, y15, y14, y13, y12, y11, y10;
+    logic  y9,  y8,  y7,  y6,  y5,  y4,  y3,  y2,  y1,  y0 ;
+    logic  t4,  t3,  t2,  t1,  t0 ;
+    assign y17 = x[ 7] ^     x[ 4];
+    assign y16 = x[ 6] ^~ x[ 4];
+    assign y2  = x[ 7] ^~ x[ 6];
+    assign y1  = x[ 4] ^     x[ 3];
+    assign y18 = x[ 3] ^~ x[ 0];
+    assign t0  = x[ 1] ^     x[ 0];
+    assign y6  = x[ 6] ^~ y17 ;
+    assign y14 = y16  ^     t0;
+    assign y7  = x[ 0] ^~ y1;
+    assign y8  = y2  ^     y18;
+    assign y9  = y2  ^     t0;
+    assign y3  = y1  ^     t0;
+    assign y19 = x[ 5] ^~ y1;
+    assign t1  = x[ 6] ^    x[ 1];
+    assign y13 = x[ 5] ^~ y14;
+    assign y15 = y18  ^     t1;
+    assign y4  = x[ 3] ^     y6;
+    assign t2  = x[ 5] ^~ x[ 2];
+    assign t3  = x[ 2] ^~ x[ 1];
+    assign t4  = x[ 5] ^~ x[ 3];
+    assign y5  = y16  ^     t2 ;
+    assign y12 = t1  ^     t4 ;
+    assign y20 = y1  ^     t3 ;
+    assign y11 = y8  ^     y20 ;
+    assign y10 = y8  ^     t3 ;
+    assign y0  = x[ 7] ^     t2 ;
+
+    return {y20, y19, y18, y17, y16, y15, y14, y13, y12, y11,
+            y10, y9,  y8,  y7,  y6,  y5,  y4,  y3,  y2,  y1,  y0};
+endfunction
+
+// inverse aes_sbox_out
+function automatic logic [7:0] aes_inv_sbox_out(logic [17:0] x);
+    logic [7:0] y;
+    logic  t29, t28, t27, t26, t25, t24, t23, t22,      t20;
+    logic  t19, t18, t17, t16, t15, t14, t13, t12, t11, t10;
+    logic  t9,  t8,  t7,  t6,  t5,  t4,  t3,  t2,  t1,  t0 ;
+    assign t0  = x[ 2] ^     x[11];
+    assign t1  = x[ 8] ^     x[ 9];
+    assign t2  = x[ 4] ^     x[12];
+    assign t3  = x[15] ^     x[ 0];
+    assign t4  = x[16] ^     x[ 6];
+    assign t5  = x[14] ^     x[ 1];
+    assign t6  = x[17] ^     x[10];
+    assign t7  = t0    ^     t1   ;
+    assign t8  = x[ 0] ^     x[ 3];
+    assign t9  = x[ 5] ^     x[13];
+    assign t10 = x[ 7] ^     t4   ;
+    assign t11 = t0    ^     t3   ;
+    assign t12 = x[14] ^     x[16];
+    assign t13 = x[17] ^     x[ 1];
+    assign t14 = x[17] ^     x[12];
+    assign t15 = x[ 4] ^     x[ 9];
+    assign t16 = x[ 7] ^     x[11];
+    assign t17 = x[ 8] ^     t2 ;
+    assign t18 = x[13] ^     t5 ;
+    assign t19 = t2   ^     t3 ;
+    assign t20 = t4   ^     t6 ;
+    assign t22 = t2   ^     t7 ;
+    assign t23 = t7   ^     t8 ;
+    assign t24 = t5   ^     t7 ;
+    assign t25 = t6   ^     t10;
+    assign t26 = t9   ^     t11;
+    assign t27 = t10  ^     t18;
+    assign t28 = t11  ^     t25;
+    assign t29 = t15  ^     t20;
+
+    assign y[ 0] = t9  ^ t16;
+    assign y[ 1] = t14 ^ t23;
+    assign y[ 2] = t19 ^ t24;
+    assign y[ 3] = t23 ^ t27;
+    assign y[ 4] = t12 ^ t22;
+    assign y[ 5] = t17 ^ t28;
+    assign y[ 6] = t26 ^ t29;
+    assign y[ 7] = t13 ^ t22;
+    return  y;
+endfunction
+
+logic [20:0] fwd_top, inv_top, top_box;
+assign fwd_top = aes_sbox_top(in);
+assign inv_top = aes_inv_sbox_top(in);
+assign top_box = (fw)? fwd_top : inv_top;
+
+logic [17:0] mid;
+assign mid     = aes_sbox_inv_mid(top_box);
+
+logic [ 7:0] fwd_out, inv_out;
+assign fwd_out = aes_sbox_out(mid);
+assign inv_out = aes_inv_sbox_out(mid);
+assign fx      = (fw)? fwd_out : inv_out;
+
+endmodule
+
+
+

From f95dab2349221e69416e17488d4d5e4a952857b2 Mon Sep 17 00:00:00 2001
From: phthinh <hung3@e.ntu.edu.sg>
Date: Wed, 24 Nov 2021 14:35:19 +0000
Subject: [PATCH 06/21] Zk: add support for decode the SM3/4 instructions.

---
 rtl/ibex_decoder.sv | 38 +++++++++++++++++++++++++++-----------
 rtl/ibex_pkg.sv     | 14 +++++++++++++-
 rtl/ibex_top.sv     |  2 +-
 3 files changed, 41 insertions(+), 13 deletions(-)

diff --git a/rtl/ibex_decoder.sv b/rtl/ibex_decoder.sv
index 90cc9f8c71..0d22a306a7 100644
--- a/rtl/ibex_decoder.sv
+++ b/rtl/ibex_decoder.sv
@@ -394,6 +394,8 @@ module ibex_decoder #(
                   7'b000_0001,                                                         // sha256sum1
                   7'b000_0010,                                                         // sha256sig0
                   7'b000_0011: illegal_insn = (RV32Zk == RV32Zkn) ? 1'b0 : 1'b1;       // sha256sig1
+                  7'b000_1000,                                                         // sm3p0
+                  7'b000_1001: illegal_insn = (RV32Zk == RV32Zks) ? 1'b0 : 1'b1;       // sm3p1
                   default: illegal_insn = 1'b1;
                 endcase
               end
@@ -457,8 +459,10 @@ module ibex_decoder #(
         rf_we           = 1'b1;
         if ({instr[26], instr[13:12]} == {1'b1, 2'b01}) begin
           illegal_insn = (RV32B != RV32BNone) ? 1'b0 : 1'b1; // cmix / cmov / fsl / fsr
-        end else if ({instr[29:28],instr[25], instr[14:12]} == {3'b101, 3'b000}) begin
+        end else if ({instr[29:28],instr[25], instr[14:12]} == {3'b10__1, 3'b000}) begin
           illegal_insn = (RV32Zk == RV32Zkn) ? 1'b0 : 1'b1; // aes32ds / aes32dsm / aes32es / aes32esm
+        end else if ({instr[29:27],instr[25], instr[14:12]} == {4'b110_0, 3'b000}) begin
+          illegal_insn = (RV32Zk == RV32Zks) ? 1'b0 : 1'b1; // sm4ed / sm4ks
         end else begin
           unique case ({instr[31:25], instr[14:12]})
             // RV32I ALU operations
@@ -881,12 +885,14 @@ module ibex_decoder #(
                   default: ;
                 endcase
               end
-              5'b0_0010: begin                                                         // Zk, zkh
+              5'b0_0010: begin                                                         // Zk, zkh,zks
                 unique case (instr_alu[26:20])
                   7'b000_0000: if (RV32Zk == RV32Zkn) alu_operator_o = ZKN_SHA256SUM0; // sha256sum0
                   7'b000_0001: if (RV32Zk == RV32Zkn) alu_operator_o = ZKN_SHA256SUM1; // sha256sum1
                   7'b000_0010: if (RV32Zk == RV32Zkn) alu_operator_o = ZKN_SHA256SIG0; // sha256sig0
                   7'b000_0011: if (RV32Zk == RV32Zkn) alu_operator_o = ZKN_SHA256SIG1; // sha256sig1
+                  7'b000_1000: if (RV32Zk == RV32Zks) alu_operator_o = ZKS_SM3P0;      // sm3p0
+                  7'b000_1001: if (RV32Zk == RV32Zks) alu_operator_o = ZKS_SM3P1;      // sm3p1
                   default:     alu_operator_o = ALU_SLL;
                 endcase
               end
@@ -1075,24 +1081,34 @@ module ibex_decoder #(
             {7'b010_1110, 3'b000}: if (RV32Zk == RV32Zkn) alu_operator_o = ZKN_SHA512SIG0H; // sha512_sig0h
             {7'b010_1111, 3'b000}: if (RV32Zk == RV32Zkn) alu_operator_o = ZKN_SHA512SIG1H; // sha512_sig1h
 
-            // RV32Zk zkde
-            {7'b001_0001, 3'b000}: if (RV32Zk == RV32Zkn) alu_operator_o = ZKN_AES32ESB0; // aes32esb0
-            {7'b011_0001, 3'b000}: if (RV32Zk == RV32Zkn) alu_operator_o = ZKN_AES32ESB1; // aes32esb1
-            {7'b101_0001, 3'b000}: if (RV32Zk == RV32Zkn) alu_operator_o = ZKN_AES32ESB2; // aes32esb2
-            {7'b111_0001, 3'b000}: if (RV32Zk == RV32Zkn) alu_operator_o = ZKN_AES32ESB3; // aes32esb3
+            // RV32Zk zkned
+            {7'b001_0001, 3'b000}: if (RV32Zk == RV32Zkn) alu_operator_o = ZKN_AES32ESB0;  // aes32esb0
+            {7'b011_0001, 3'b000}: if (RV32Zk == RV32Zkn) alu_operator_o = ZKN_AES32ESB1;  // aes32esb1
+            {7'b101_0001, 3'b000}: if (RV32Zk == RV32Zkn) alu_operator_o = ZKN_AES32ESB2;  // aes32esb2
+            {7'b111_0001, 3'b000}: if (RV32Zk == RV32Zkn) alu_operator_o = ZKN_AES32ESB3;  // aes32esb3
             {7'b001_0011, 3'b000}: if (RV32Zk == RV32Zkn) alu_operator_o = ZKN_AES32ESMB0; // aes32esmb0
             {7'b011_0011, 3'b000}: if (RV32Zk == RV32Zkn) alu_operator_o = ZKN_AES32ESMB1; // aes32esmb1
             {7'b101_0011, 3'b000}: if (RV32Zk == RV32Zkn) alu_operator_o = ZKN_AES32ESMB2; // aes32esmb2
             {7'b111_0011, 3'b000}: if (RV32Zk == RV32Zkn) alu_operator_o = ZKN_AES32ESMB3; // aes32esmb3
-            {7'b001_0101, 3'b000}: if (RV32Zk == RV32Zkn) alu_operator_o = ZKN_AES32DSB0; // aes32dsb0
-            {7'b011_0101, 3'b000}: if (RV32Zk == RV32Zkn) alu_operator_o = ZKN_AES32DSB1; // aes32dsb1
-            {7'b101_0101, 3'b000}: if (RV32Zk == RV32Zkn) alu_operator_o = ZKN_AES32DSB2; // aes32dsb2
-            {7'b111_0101, 3'b000}: if (RV32Zk == RV32Zkn) alu_operator_o = ZKN_AES32DSB3; // aes32dsb3
+            {7'b001_0101, 3'b000}: if (RV32Zk == RV32Zkn) alu_operator_o = ZKN_AES32DSB0;  // aes32dsb0
+            {7'b011_0101, 3'b000}: if (RV32Zk == RV32Zkn) alu_operator_o = ZKN_AES32DSB1;  // aes32dsb1
+            {7'b101_0101, 3'b000}: if (RV32Zk == RV32Zkn) alu_operator_o = ZKN_AES32DSB2;  // aes32dsb2
+            {7'b111_0101, 3'b000}: if (RV32Zk == RV32Zkn) alu_operator_o = ZKN_AES32DSB3;  // aes32dsb3
             {7'b001_0111, 3'b000}: if (RV32Zk == RV32Zkn) alu_operator_o = ZKN_AES32DSMB0; // aes32dsmb0
             {7'b011_0111, 3'b000}: if (RV32Zk == RV32Zkn) alu_operator_o = ZKN_AES32DSMB1; // aes32dsmb1
             {7'b101_0111, 3'b000}: if (RV32Zk == RV32Zkn) alu_operator_o = ZKN_AES32DSMB2; // aes32dsmb2
             {7'b111_0111, 3'b000}: if (RV32Zk == RV32Zkn) alu_operator_o = ZKN_AES32DSMB3; // aes32dsmb3
 
+            // RV32Zk zks
+            {7'b001_1000, 3'b000}: if (RV32Zk == RV32Zks) alu_operator_o = ZKS_SM4EDB0; // sm4edb0
+            {7'b011_1000, 3'b000}: if (RV32Zk == RV32Zks) alu_operator_o = ZKS_SM4EDB1; // sm4edb1
+            {7'b101_1000, 3'b000}: if (RV32Zk == RV32Zks) alu_operator_o = ZKS_SM4EDB2; // sm4edb2
+            {7'b111_1000, 3'b000}: if (RV32Zk == RV32Zks) alu_operator_o = ZKS_SM4EDB3; // sm4edb3
+            {7'b001_1010, 3'b000}: if (RV32Zk == RV32Zks) alu_operator_o = ZKS_SM4KSB0; // sm4ksb0
+            {7'b011_1010, 3'b000}: if (RV32Zk == RV32Zks) alu_operator_o = ZKS_SM4KSB1; // sm4ksb1
+            {7'b101_1010, 3'b000}: if (RV32Zk == RV32Zks) alu_operator_o = ZKS_SM4KSB2; // sm4ksb2
+            {7'b111_1010, 3'b000}: if (RV32Zk == RV32Zks) alu_operator_o = ZKS_SM4KSB3; // sm4ksb3
+
             // RV32M instructions, all use the same ALU operation
             {7'b000_0001, 3'b000}: begin // mul
               alu_operator_o = ALU_ADD;
diff --git a/rtl/ibex_pkg.sv b/rtl/ibex_pkg.sv
index 7d1f484006..180bba0117 100644
--- a/rtl/ibex_pkg.sv
+++ b/rtl/ibex_pkg.sv
@@ -211,7 +211,19 @@ package ibex_pkg;
     ZKN_AES32ESMB0,
     ZKN_AES32ESMB1,
     ZKN_AES32ESMB2,
-    ZKN_AES32ESMB3
+    ZKN_AES32ESMB3,
+
+    //Zks
+    ZKS_SM4EDB0,
+    ZKS_SM4EDB1,
+    ZKS_SM4EDB2,
+    ZKS_SM4EDB3,
+    ZKS_SM4KSB0,
+    ZKS_SM4KSB1,
+    ZKS_SM4KSB2,
+    ZKS_SM4KSB3,
+    ZKS_SM3P0,
+    ZKS_SM3P1
   } alu_op_e;
 
   typedef enum logic [1:0] {
diff --git a/rtl/ibex_top.sv b/rtl/ibex_top.sv
index d58086d4cf..0ab264e5d1 100644
--- a/rtl/ibex_top.sv
+++ b/rtl/ibex_top.sv
@@ -21,7 +21,7 @@ module ibex_top import ibex_pkg::*; #(
   parameter bit          RV32E            = 1'b0,
   parameter rv32m_e      RV32M            = RV32MFast,
   parameter rv32b_e      RV32B            = RV32BNone,
-  parameter rv32zk_e     RV32Zk           = RV32Zkn,
+  parameter rv32zk_e     RV32Zk           = RV32Zks,
   parameter regfile_e    RegFile          = RegFileFF,
   parameter bit          BranchTargetALU  = 1'b0,
   parameter bit          WritebackStage   = 1'b0,

From 45e7d2db44322c68b5bab7c3c8f8d0765a7edd39 Mon Sep 17 00:00:00 2001
From: phthinh <hung3@e.ntu.edu.sg>
Date: Thu, 25 Nov 2021 11:46:05 +0000
Subject: [PATCH 07/21] ZK: add the implementation of SM3/4 instructions

---
 rtl/ibex_sm4_sbox.sv | 189 +++++++++++++++++++++++++++++++++++++++++++
 rtl/ibex_zk.sv       |  79 +++++++++++++++++-
 2 files changed, 266 insertions(+), 2 deletions(-)
 create mode 100644 rtl/ibex_sm4_sbox.sv

diff --git a/rtl/ibex_sm4_sbox.sv b/rtl/ibex_sm4_sbox.sv
new file mode 100644
index 0000000000..ebe5b1661b
--- /dev/null
+++ b/rtl/ibex_sm4_sbox.sv
@@ -0,0 +1,189 @@
+// Copyright lowRISC contributors.
+// Copyright 2018 ETH Zurich and University of Bologna, see also CREDITS.md.
+// Licensed under the Apache License, Version 2.0, see LICENSE for details.
+// SPDX-License-Identifier: Apache-2.0
+
+/**
+ * SM4 Sbox unit
+ */
+module ibex_sm4_sbox (
+input  logic [7:0] in,
+output logic [7:0] fx
+);
+
+// sm4_sbox_top
+function automatic logic [20:0] sm4_sbox_top(logic [7:0] x);
+    logic  y20;
+    logic  y19, y18, y17, y16, y15, y14, y13, y12, y11, y10;
+    logic  y9,  y8,  y7,  y6,  y5,  y4,  y3,  y2,  y1,  y0 ;
+    logic  t6,  t5,  t4,  t3,  t2,  t1,  t0 ;
+
+    assign y18 = x[ 2] ^  x[ 6];
+    assign t0  = x[ 3] ^  x[ 4];
+    assign t1  = x[ 2] ^  x[ 7];
+    assign t2  = x[ 7] ^  y18  ;
+    assign t3  = x[ 1] ^  t1   ;
+    assign t4  = x[ 6] ^  x[ 7];
+    assign t5  = x[ 0] ^  y18  ;
+    assign t6  = x[ 3] ^  x[ 6];
+    assign y10 = x[ 1] ^  y18;
+    assign y0  = x[ 5] ^~ y10;
+    assign y1  = t0    ^  t3 ;
+    assign y2  = x[ 0] ^  t0 ;
+    assign y4  = x[ 0] ^  t3 ;
+    assign y3  = x[ 3] ^  y4 ;
+    assign y5  = x[ 5] ^  t5 ;
+    assign y6  = x[ 0] ^~ x[ 1];
+    assign y7  = t0    ^~ y10;
+    assign y8  = t0    ^  t5 ;
+    assign y9  = x[ 3];
+    assign y11 = t0    ^  t4 ;
+    assign y12 = x[ 5] ^  t4 ;
+    assign y13 = x[ 5] ^~ y1 ;
+    assign y14 = x[ 4] ^~ t2 ;
+    assign y15 = x[ 1] ^~ t6 ;
+    assign y16 = x[ 0] ^~ t2 ;
+    assign y17 = t0    ^~ t2 ;
+    assign y19 = x[ 5] ^~ y14;
+    assign y20 = x[ 0] ^  t1 ;
+
+    return {y20, y19, y18, y17, y16, y15, y14, y13, y12, y11,
+            y10, y9,  y8,  y7,  y6,  y5,  y4,  y3,  y2,  y1,  y0};
+endfunction
+
+// sm4_sbox_out
+function automatic logic [7:0] sm4_sbox_out(logic [17:0] x);
+    logic [7:0] y;
+    logic  t29, t28, t27, t26, t25, t24, t23, t22, t21, t20;
+    logic  t19, t18, t17, t16, t15, t14, t13, t12, t11, t10;
+    logic  t9,  t8,  t7,  t6,  t5,  t4,  t3,  t2,  t1,  t0 ;
+    assign t0   = x[ 4] ^  x[ 7];
+    assign t1   = x[13] ^  x[15];
+    assign t2   = x[ 2] ^  x[16];
+    assign t3   = x[ 6] ^  t0;
+    assign t4   = x[12] ^  t1;
+    assign t5   = x[ 9] ^  x[10];
+    assign t6   = x[11] ^  t2;
+    assign t7   = x[ 1] ^  t4;
+    assign t8   = x[ 0] ^  x[17];
+    assign t9   = x[ 3] ^  x[17];
+    assign t10  = x[ 8] ^  t3;
+    assign t11  = t2    ^  t5;
+    assign t12  = x[14] ^  t6;
+    assign t13  = t7    ^  t9;
+    assign t14  = x[ 0] ^  x[ 6];
+    assign t15  = x[ 7] ^  x[16];
+    assign t16  = x[ 5] ^  x[13];
+    assign t17  = x[ 3] ^  x[15];
+    assign t18  = x[10] ^  x[12];
+    assign t19  = x[ 9] ^  t1 ;
+    assign t20  = x[ 4] ^  t4 ;
+    assign t21  = x[14] ^  t3 ;
+    assign t22  = x[16] ^  t5 ;
+    assign t23  = t7    ^  t14;
+    assign t24  = t8    ^  t11;
+    assign t25  = t0    ^  t12;
+    assign t26  = t17   ^  t3 ;
+    assign t27  = t18   ^  t10;
+    assign t28  = t19   ^  t6 ;
+    assign t29  = t8    ^  t10;
+    assign y[0] = t11   ^~ t13;
+    assign y[1] = t15   ^~ t23;
+    assign y[2] = t20   ^  t24;
+    assign y[3] = t16   ^  t25;
+    assign y[4] = t26   ^~ t22;
+    assign y[5] = t21   ^  t13;
+    assign y[6] = t27   ^~ t12;
+    assign y[7] = t28   ^~ t29;
+
+    return  y;
+endfunction
+
+
+// sm4_sbox_inv_mid
+function automatic logic [17:0] sm4_sbox_inv_mid(logic [20:0] x);
+    logic [17:0] y;
+    logic  t45, t44, t43, t42, t41, t40;
+    logic  t39, t38, t37, t36, t35, t34, t33, t32, t31, t30;
+    logic  t29, t28, t27, t26, t25, t24, t23, t22, t21, t20;
+    logic  t19, t18, t17, t16, t15, t14, t13, t12, t11, t10;
+    logic  t9,  t8,  t7,  t6,  t5,  t4,  t3,  t2,  t1,  t0 ;
+    assign t0  = x[ 3] ^  x[12];
+    assign t1  = x[ 9] &  x[ 5];
+    assign t2  = x[17] &  x[ 6];
+    assign t3  = x[10] ^  t1   ;
+    assign t4  = x[14] &  x[ 0];
+    assign t5  = t4    ^  t1   ;
+    assign t6  = x[ 3] &  x[12];
+    assign t7  = x[16] &  x[ 7];
+    assign t8  = t0    ^  t6   ;
+    assign t9  = x[15] &  x[13];
+    assign t10 = t9    ^  t6   ;
+    assign t11 = x[ 1] &  x[11];
+    assign t12 = x[ 4] &  x[20];
+    assign t13 = t12   ^  t11  ;
+    assign t14 = x[ 2] &  x[ 8];
+    assign t15 = t14   ^  t11  ;
+    assign t16 = t3    ^  t2   ;
+    assign t17 = t5    ^  x[18];
+    assign t18 = t8    ^  t7   ;
+    assign t19 = t10   ^  t15  ;
+    assign t20 = t16   ^  t13  ;
+    assign t21 = t17   ^  t15  ;
+    assign t22 = t18   ^  t13  ;
+    assign t23 = t19   ^  x[19];
+    assign t24 = t22   ^  t23  ;
+    assign t25 = t22   &  t20  ;
+    assign t26 = t21   ^  t25  ;
+    assign t27 = t20   ^  t21  ;
+    assign t28 = t23   ^  t25  ;
+    assign t29 = t28   &  t27  ;
+    assign t30 = t26   &  t24  ;
+    assign t31 = t20   &  t23  ;
+    assign t32 = t27   &  t31  ;
+    assign t33 = t27   ^  t25  ;
+    assign t34 = t21   &  t22  ;
+    assign t35 = t24   &  t34  ;
+    assign t36 = t24   ^  t25  ;
+    assign t37 = t21   ^  t29  ;
+    assign t38 = t32   ^  t33  ;
+    assign t39 = t23   ^  t30  ;
+    assign t40 = t35   ^  t36  ;
+    assign t41 = t38   ^  t40  ;
+    assign t42 = t37   ^  t39  ;
+    assign t43 = t37   ^  t38  ;
+    assign t44 = t39   ^  t40  ;
+    assign t45 = t42   ^  t41  ;
+    assign y[ 0] = t38 &  x[ 7];
+    assign y[ 1] = t37 &  x[13];
+    assign y[ 2] = t42 &  x[11];
+    assign y[ 3] = t45 &  x[20];
+    assign y[ 4] = t41 &  x[ 8];
+    assign y[ 5] = t44 &  x[ 9];
+    assign y[ 6] = t40 &  x[17];
+    assign y[ 7] = t39 &  x[14];
+    assign y[ 8] = t43 &  x[ 3];
+    assign y[ 9] = t38 &  x[16];
+    assign y[10] = t37 &  x[15];
+    assign y[11] = t42 &  x[ 1];
+    assign y[12] = t45 &  x[ 4];
+    assign y[13] = t41 &  x[ 2];
+    assign y[14] = t44 &  x[ 5];
+    assign y[15] = t40 &  x[ 6];
+    assign y[16] = t39 &  x[ 0];
+    assign y[17] = t43 &  x[12];
+
+    return y;
+endfunction
+
+logic [20:0] t1;
+logic [17:0] t2;
+
+assign t1 = sm4_sbox_top(in);
+assign t2 = sm4_sbox_inv_mid(t1);
+assign fx = sm4_sbox_out(t2);
+
+endmodule
+
+
+
diff --git a/rtl/ibex_zk.sv b/rtl/ibex_zk.sv
index 1693c761ec..e5d3029e84 100644
--- a/rtl/ibex_zk.sv
+++ b/rtl/ibex_zk.sv
@@ -18,6 +18,7 @@ module ibex_zk #(
 );
   import ibex_pkg::*;
 `define RORI32(a,b) ((a >> b) | (a << 32-b))
+`define ROLI32(a,b) ((a << b) | (a >> 32-b))
 `define SRLI32(a,b) ((a >> b)              )
 `define SLLI32(a,b) ((a << b)              )
 
@@ -38,6 +39,9 @@ function automatic logic [7:0] xtimeN(logic [7:0] a, logic [3:0] b);
     return xtimeN;
 endfunction
 
+  logic        zkn_val;
+  logic [31:0] zkn_result;
+
   if (RV32Zk == RV32Zkn) begin : gen_zkn
     logic bs0, bs1, bs2, bs3; //byte select in aes instructions
     assign bs0 = (operator_i == ZKN_AES32DSB0) || (operator_i == ZKN_AES32DSMB0) ||
@@ -133,11 +137,11 @@ endfunction
     assign sha512_sig1h = `SLLI32(operand_a_i, 3)^`SRLI32(operand_a_i, 6)^`SRLI32(operand_a_i,19)^
                           `SRLI32(operand_b_i,29)                        ^`SLLI32(operand_b_i,13);
 
-    assign zk_val_o     = |{sha256_sum0_sel, sha256_sum1_sel, sha256_sig0_sel, sha256_sig1_sel,
+    assign zkn_val      = |{sha256_sum0_sel, sha256_sum1_sel, sha256_sig0_sel, sha256_sig1_sel,
                            sha512_sum0r_sel, sha512_sum1r_sel,
                            sha512_sig0l_sel, sha512_sig1l_sel,
                            sha512_sig0h_sel, sha512_sig1h_sel, aes32_sel};
-    assign result_o     = {32{aes32_sel       }} & (rotated ^ operand_a_i) |
+    assign zkn_result   = {32{aes32_sel       }} & (rotated ^ operand_a_i) |
                           {32{sha256_sig0_sel }} & sha256_sig0  |
                           {32{sha256_sig1_sel }} & sha256_sig1  |
                           {32{sha256_sum0_sel }} & sha256_sum0  |
@@ -148,9 +152,80 @@ endfunction
                           {32{sha512_sig0h_sel}} & sha512_sig0h |
                           {32{sha512_sig1l_sel}} & sha512_sig1l |
                           {32{sha512_sig1h_sel}} & sha512_sig1h ;
+  end else begin : no_gen_zkn
+    assign zkn_val    =  1'b0;
+    assign zkn_result = 32'd0;
+  end
+
+  logic        zks_val;
+  logic [31:0] zks_result;
+  if (RV32Zk == RV32Zks) begin : gen_zks
+    logic  sm4ed_sel, sm4ks_sel, sm3p0_sel, sm3p1_sel;
+    assign sm4ed_sel = (operator_i == ZKS_SM4EDB0) || (operator_i == ZKS_SM4EDB2) ||
+                       (operator_i == ZKS_SM4EDB1) || (operator_i == ZKS_SM4EDB3) ;
+    assign sm4ks_sel = (operator_i == ZKS_SM4KSB0) || (operator_i == ZKS_SM4KSB2) ||
+                       (operator_i == ZKS_SM4KSB1) || (operator_i == ZKS_SM4KSB3) ;
+    assign sm3p0_sel = (operator_i == ZKS_SM3P0);
+    assign sm3p1_sel = (operator_i == ZKS_SM3P1);
+
+    logic zks_bs0, zks_bs1, zks_bs2, zks_bs3; //byte select in aes instructions
+    assign zks_bs0 = (operator_i == ZKS_SM4EDB0) || (operator_i == ZKS_SM4KSB0) ;
+    assign zks_bs1 = (operator_i == ZKS_SM4EDB1) || (operator_i == ZKS_SM4KSB1) ;
+    assign zks_bs2 = (operator_i == ZKS_SM4EDB2) || (operator_i == ZKS_SM4KSB2) ;
+    assign zks_bs3 = (operator_i == ZKS_SM4EDB3) || (operator_i == ZKS_SM4KSB3) ;
+    logic  [7:0] sbox_in;
+    assign       sbox_in = {8{zks_bs0}} & operand_b_i[ 7: 0] |
+                           {8{zks_bs1}} & operand_b_i[15: 8] |
+                           {8{zks_bs2}} & operand_b_i[23:16] |
+                           {8{zks_bs3}} & operand_b_i[31:24] ;
+    logic [ 7:0] sm4_sbox_out;
+    // Submodule - SBox
+    ibex_sm4_sbox ism4_sbox (
+      .in (sbox_in),
+      .fx (sm4_sbox_out)
+    );
+
+    logic [31:0] s;
+    assign s     = {24'b0, sm4_sbox_out};
+
+    // ED Instruction
+    logic [31:0] ed1, ed2;
+    assign ed1   = s   ^  (s           <<  8) ^ (s << 2) ^ (s << 18);
+    assign ed2   = ed1 ^ ((s & 32'h3F) << 26) ^ ((s & 32'hC0) << 10);
+
+    // KS Instruction
+    logic [31:0] ks1, ks2;
+    assign ks1   = s   ^ ((s & 32'h07) << 29) ^ ((s & 32'hFE) <<  7);
+    assign ks2   = ks1 ^ ((s & 32'h01) << 23) ^ ((s & 32'hF8) << 13);
+
+    // Rotate and XOR result
+    logic [31:0] rot_in, rot_out, sm4;
+    assign rot_in  = sm4ks_sel ? ks2 : ed2;
+    assign rot_out = {32{zks_bs0}} & {rot_in                      } |
+                     {32{zks_bs1}} & {rot_in[23:0], rot_in[31:24] } |
+                     {32{zks_bs2}} & {rot_in[15:0], rot_in[31:16] } |
+                     {32{zks_bs3}} & {rot_in[ 7:0], rot_in[31: 8] } ;
+    assign sm4     = rot_out ^ operand_a_i ;
+
+    logic [31:0] sm3_p0, sm3_p1;
+    assign sm3_p0  = operand_a_i ^ `ROLI32(operand_a_i,  9) ^ `ROLI32(operand_a_i,17);
+    assign sm3_p1  = operand_a_i ^ `ROLI32(operand_a_i, 15) ^ `ROLI32(operand_a_i,23);
+
+    assign zks_val    =|{sm4ed_sel, sm4ks_sel, sm3p0_sel, sm3p1_sel};
+    assign zks_result = {32{sm4ed_sel}} & sm4    |
+                        {32{sm4ks_sel}} & sm4    |
+                        {32{sm3p0_sel}} & sm3_p0 |
+                        {32{sm3p1_sel}} & sm3_p1 ;
+  end else begin : no_gen_zks
+    assign zks_val    =  1'b0;
+    assign zks_result = 32'd0;
   end
 
+  assign zk_val_o = zkn_val   || zks_val;
+  assign result_o = zkn_result | zks_result;
+
 `undef RORI32
+`undef ROLI32
 `undef SRLI32
 `undef SLLI32
 

From 6df183ac832bb90daac29a179a562ebe20195410 Mon Sep 17 00:00:00 2001
From: phthinh <hung3@e.ntu.edu.sg>
Date: Thu, 25 Nov 2021 11:54:56 +0000
Subject: [PATCH 08/21] ZK: add the implementation of SM3/4 instructions

---
 ibex_core.core | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/ibex_core.core b/ibex_core.core
index 0a21432a92..3b68d0a13c 100644
--- a/ibex_core.core
+++ b/ibex_core.core
@@ -35,6 +35,8 @@ filesets:
       - rtl/ibex_wb_stage.sv
       - rtl/ibex_dummy_instr.sv
       - rtl/ibex_zk.sv
+      - rtl/ibex_aes_sbox.sv
+      - rtl/ibex_sm4_sbox.sv
       - rtl/ibex_core.sv
     file_type: systemVerilogSource
 

From 06d9be9434d21951bcdee3ff64771453b62bef4c Mon Sep 17 00:00:00 2001
From: phthinh <hung3@e.ntu.edu.sg>
Date: Fri, 26 Nov 2021 10:19:08 +0000
Subject: [PATCH 09/21] Zk: add support for decode the ZBK instructions.

---
 rtl/ibex_aes_sbox.sv |   8 +++
 rtl/ibex_decoder.sv  | 122 +++++++++++++++++++++++++++++++------------
 rtl/ibex_pkg.sv      |  18 +++++++
 rtl/ibex_sm4_sbox.sv |   9 ++++
 rtl/ibex_top.sv      |   2 +-
 5 files changed, 124 insertions(+), 35 deletions(-)

diff --git a/rtl/ibex_aes_sbox.sv b/rtl/ibex_aes_sbox.sv
index 9197c3ef3a..36e23ce0ff 100644
--- a/rtl/ibex_aes_sbox.sv
+++ b/rtl/ibex_aes_sbox.sv
@@ -5,6 +5,14 @@
 
 /**
  * AES Sbox unit
+ * This modified version is based on the optimised AES structure proposed by Boyar & Peralta [BoPe12].
+ * AES S-Boxes are broken into a nonlinear middle layer and two linear top and bottom layers.
+ * The middle layer perform non-linear field inversion which is used for both forward and inverse S-boxes.
+ *
+ * [BoPe12] Boyar J., Peralta R. "A Small Depth-16 Circuit for the AES
+ *     S-Box." Proc.SEC 2012. IFIP AICT 376. Springer, pp. 287-298 (2012)
+ *     DOI: https://doi.org/10.1007/978-3-642-30436-1_24
+ *     Preprint: https://eprint.iacr.org/2011/332.pdf
  */
 module ibex_aes_sbox (
 input  logic       fw,
diff --git a/rtl/ibex_decoder.sv b/rtl/ibex_decoder.sv
index 0d22a306a7..4e6a157b46 100644
--- a/rtl/ibex_decoder.sv
+++ b/rtl/ibex_decoder.sv
@@ -368,7 +368,9 @@ module ibex_decoder #(
               5'b0_0101,                                                              // sbseti
               5'b0_1101: illegal_insn = (RV32B != RV32BNone) ? 1'b0 : 1'b1;           // sbinvi
               5'b0_0001: if (instr[26] == 1'b0) begin
-                illegal_insn = (RV32B == RV32BFull) ? 1'b0 : 1'b1;                    // shfl
+                if      (RV32B  == RV32BFull ) illegal_insn = 1'b0;                   // shfl
+                else if (RV32Zk != RV32ZkNone) illegal_insn = (instr[25:20] == 6'b00_1111) ? 1'b0 : 1'b1; //zip
+                else illegal_insn = 1'b1;
               end else begin
                 illegal_insn = 1'b1;
               end
@@ -412,17 +414,20 @@ module ibex_decoder #(
                 5'b0_1000: illegal_insn = (instr[26:25] == 2'b00) ? 1'b0 : 1'b1;       // srai
 
                 5'b0_0100,                                                             // sroi
-                5'b0_1100,                                                             // rori
                 5'b0_1001: illegal_insn = (RV32B != RV32BNone) ? 1'b0 : 1'b1;          // sbexti
+                5'b0_1100: illegal_insn = ((RV32B != RV32BNone)  ||
+                                           (RV32Zk!= RV32ZkNone)) ? 1'b0 : 1'b1;       // rori
+
 
                 5'b0_1101: begin
                   if ((RV32B == RV32BFull)) begin
                     illegal_insn = 1'b0;                                               // grevi
                   end else begin
                     unique case (instr[24:20])
-                      5'b11111,                                                        // rev
-                      5'b11000: illegal_insn = (RV32B == RV32BBalanced) ? 1'b0 : 1'b1; // rev8
-
+                      5'b11111: illegal_insn = ( RV32B  == RV32BBalanced ) ? 1'b0 : 1'b1; // rev
+                      5'b11000: illegal_insn = ((RV32B  == RV32BBalanced) ||
+                                                (RV32Zk != RV32ZkNone   )) ? 1'b0 : 1'b1; // rev8
+                      5'b00111: illegal_insn = ( RV32Zk != RV32ZkNone    ) ? 1'b0 : 1'b1; // brev8
                       default: illegal_insn = 1'b1;
                     endcase
                   end
@@ -438,7 +443,9 @@ module ibex_decoder #(
                 end
                 5'b0_0001: begin
                   if (instr[26] == 1'b0) begin
-                    illegal_insn = (RV32B == RV32BFull) ? 1'b0 : 1'b1;                // unshfl
+                    if      (RV32B  == RV32BFull ) illegal_insn = 1'b0;               // unshfl
+                    else if (RV32Zk != RV32ZkNone) illegal_insn = (instr[25:20] == 6'b00_1111) ? 1'b0 : 1'b1; //unzip
+                    else illegal_insn = 1'b1;
                   end else begin
                     illegal_insn = 1'b1;
                   end
@@ -480,22 +487,23 @@ module ibex_decoder #(
             // RV32B zba
             {7'b001_0000, 3'b010}, // sh1add
             {7'b001_0000, 3'b100}, // sh2add
-            {7'b001_0000, 3'b110}, // sh3add
-            // RV32B zbb
+            {7'b001_0000, 3'b110}: illegal_insn = (RV32B != RV32BNone) ? 1'b0 : 1'b1; // sh3add
+            // RV32B zbb and RV32Zk
             {7'b010_0000, 3'b111}, // andn
             {7'b010_0000, 3'b110}, // orn
             {7'b010_0000, 3'b100}, // xnor
-            {7'b001_0000, 3'b001}, // slo
-            {7'b001_0000, 3'b101}, // sro
             {7'b011_0000, 3'b001}, // rol
             {7'b011_0000, 3'b101}, // ror
+            {7'b000_0100, 3'b100}, // pack
+            {7'b000_0100, 3'b111}: illegal_insn = ((RV32B  != RV32BNone ) ||
+                                                   (RV32Zk != RV32ZkNone)) ? 1'b0 : 1'b1; // packh
+            {7'b010_0100, 3'b100}, // packu
+            {7'b001_0000, 3'b001}, // slo
+            {7'b001_0000, 3'b101}, // sro
             {7'b000_0101, 3'b100}, // min
             {7'b000_0101, 3'b101}, // max
             {7'b000_0101, 3'b110}, // minu
             {7'b000_0101, 3'b111}, // maxu
-            {7'b000_0100, 3'b100}, // pack
-            {7'b010_0100, 3'b100}, // packu
-            {7'b000_0100, 3'b111}, // packh
             // RV32B zbs
             {7'b010_0100, 3'b001}, // sbclr
             {7'b001_0100, 3'b001}, // sbset
@@ -510,11 +518,17 @@ module ibex_decoder #(
             {7'b011_0100, 3'b101}, // grev
             {7'b001_0100, 3'b101}, // gorc
             {7'b000_0100, 3'b001}, // shfl
-            {7'b000_0100, 3'b101}, // unshfl
-            // RV32B zbc
+            {7'b000_0100, 3'b101}: illegal_insn = (RV32B == RV32BFull) ? 1'b0 : 1'b1; // unshfl
+            // RV32B zbc and RV32Zk
             {7'b000_0101, 3'b001}, // clmul
-            {7'b000_0101, 3'b010}, // clmulr
-            {7'b000_0101, 3'b011}: illegal_insn = (RV32B == RV32BFull) ? 1'b0 : 1'b1; // clmulh
+            {7'b000_0101, 3'b011}: illegal_insn = ((RV32B  == RV32BFull ) ||
+                                                   (RV32Zk != RV32ZkNone)) ? 1'b0 : 1'b1; // clmulh
+            // RV32B zbc
+            {7'b000_0101, 3'b010}: illegal_insn = (RV32B == RV32BFull) ? 1'b0 : 1'b1;     // clmulr
+
+            // RV32Zk zbk
+            {7'b001_0100, 3'b100}, // xperm8
+            {7'b001_0100, 3'b010}: illegal_insn = (RV32Zk != RV32ZkNone) ? 1'b0 : 1'b1; // xperm4
 
             // RV32Zk zkh
             {7'b010_1000, 3'b000}, // sha512_sum0r
@@ -849,8 +863,11 @@ module ibex_decoder #(
               5'b0_1001: if (RV32B != RV32BNone) alu_operator_o = ALU_SBCLR;  // Clear bit specified by immediate
               5'b0_0101: if (RV32B != RV32BNone) alu_operator_o = ALU_SBSET;  // Set bit specified by immediate
               5'b0_1101: if (RV32B != RV32BNone) alu_operator_o = ALU_SBINV;  // Invert bit specified by immediate.
-              // Shuffle with Immediate Control Value
-              5'b0_0001: if (instr_alu[26] == 0) alu_operator_o = (RV32B == RV32BFull) ? ALU_SHFL : ALU_SLL;
+              5'b0_0001: if (instr_alu[26] == 0) begin
+                 if (RV32B == RV32BFull)        alu_operator_o = ALU_SHFL;    // Shuffle with Immediate Control Value
+                 else if (RV32Zk != RV32ZkNone) alu_operator_o = (instr[25:20] == 6'b00_1111) ? ZKB_ZIP : ALU_SLL; //zbk_zip
+                 else                           alu_operator_o = ALU_SLL;
+              end
               5'b0_1100: begin
                 unique case (instr_alu[26:20])
                   7'b000_0000: if (RV32B != RV32BNone) alu_operator_o = ALU_CLZ  ; // clz
@@ -931,13 +948,22 @@ module ibex_decoder #(
                   default: ;
                 endcase
               end
-
             end else begin
-              if (instr_alu[31:27] == 5'b0_0000) begin
-                alu_operator_o = ALU_SRL;               // Shift Right Logical by Immediate
-              end else if (instr_alu[31:27] == 5'b0_1000) begin
-                alu_operator_o = ALU_SRA;               // Shift Right Arithmetically by Immediate
-              end
+              unique case (instr_alu[31:27])
+                5'b0_0000: alu_operator_o = ALU_SRL;   // Shift Right Logical by Immediate
+                5'b0_1000: alu_operator_o = ALU_SRA;   // Shift Right Arithmetically by Immediate
+                5'b0_1100: if (RV32Zk != RV32ZkNone) begin
+                  if (instr_alu[26]    == 1'b0) alu_operator_o = ZKB_RORI;         // zbkb_rori
+                end
+                5'b0_1101: if (RV32Zk != RV32ZkNone) begin
+                  if (instr_alu[26:20] == 7'b000_0111) alu_operator_o = ZKB_BREV8; // zbkb_brev8
+                  if (instr_alu[26:20] == 7'b001_1000) alu_operator_o = ZKB_REV8;  // zbkb_rev8
+                end
+                5'b0_0001: if (RV32Zk != RV32ZkNone) begin
+                  if (instr_alu[26:20] == 7'b000_1111) alu_operator_o = ZKB_UNZIP; // zbkb_unzip
+                end
+                default: ;
+              endcase
             end
           end
 
@@ -1011,13 +1037,17 @@ module ibex_decoder #(
             {7'b011_0000, 3'b001}: begin
               if (RV32B != RV32BNone) begin
                 alu_operator_o = ALU_ROL;   // rol
-                alu_multicycle_o = 1'b1;
+                alu_multicycle_o = 1'b1;                
+              end else if ((RV32Zk != RV32ZkNone)) begin
+                alu_operator_o = ZKB_ROL;   // zbk_rol
               end
             end
             {7'b011_0000, 3'b101}: begin
               if (RV32B != RV32BNone) begin
                 alu_operator_o = ALU_ROR;   // ror
                 alu_multicycle_o = 1'b1;
+              end else if (RV32Zk != RV32ZkNone) begin
+                alu_operator_o = ZKB_ROR;   // zbk_ror
               end
             end
 
@@ -1026,14 +1056,28 @@ module ibex_decoder #(
             {7'b000_0101, 3'b110}: if (RV32B != RV32BNone) alu_operator_o = ALU_MINU;   // minu
             {7'b000_0101, 3'b111}: if (RV32B != RV32BNone) alu_operator_o = ALU_MAXU;   // maxu
 
-            {7'b000_0100, 3'b100}: if (RV32B != RV32BNone) alu_operator_o = ALU_PACK;   // pack
+            {7'b000_0100, 3'b100}: begin
+              if      (RV32B  != RV32BNone ) alu_operator_o = ALU_PACK;   // pack
+              else if (RV32Zk != RV32ZkNone) alu_operator_o = ZKB_PACK;   // pack
+            end
             {7'b010_0100, 3'b100}: if (RV32B != RV32BNone) alu_operator_o = ALU_PACKU;  // packu
-            {7'b000_0100, 3'b111}: if (RV32B != RV32BNone) alu_operator_o = ALU_PACKH;  // packh
-
-            {7'b010_0000, 3'b100}: if (RV32B != RV32BNone) alu_operator_o = ALU_XNOR;   // xnor
-            {7'b010_0000, 3'b110}: if (RV32B != RV32BNone) alu_operator_o = ALU_ORN;    // orn
-            {7'b010_0000, 3'b111}: if (RV32B != RV32BNone) alu_operator_o = ALU_ANDN;   // andn
+            {7'b000_0100, 3'b111}: begin
+              if      (RV32B  != RV32BNone ) alu_operator_o = ALU_PACKH;  // packh
+              else if (RV32Zk != RV32ZkNone) alu_operator_o = ZKB_PACKH;  // andn
+            end
 
+            {7'b010_0000, 3'b100}: begin
+              if      (RV32B  != RV32BNone ) alu_operator_o = ALU_XNOR;   // xnor
+              else if (RV32Zk != RV32ZkNone) alu_operator_o = ZKB_XNOR;   // xnor
+            end
+            {7'b010_0000, 3'b110}: begin
+              if      (RV32B  != RV32BNone ) alu_operator_o = ALU_ORN;    // orn
+              else if (RV32Zk != RV32ZkNone) alu_operator_o = ZKB_ORN;    // orn
+            end
+            {7'b010_0000, 3'b111}: begin
+              if      (RV32B  != RV32BNone ) alu_operator_o = ALU_ANDN;   // andn
+              else if (RV32Zk != RV32ZkNone) alu_operator_o = ZKB_ANDN;   // andn
+            end
             // RV32B zba
             {7'b001_0000, 3'b010}: if (RV32B != RV32BNone) alu_operator_o = ALU_SH1ADD; // sh1add
             {7'b001_0000, 3'b100}: if (RV32B != RV32BNone) alu_operator_o = ALU_SH2ADD; // sh2add
@@ -1055,9 +1099,15 @@ module ibex_decoder #(
             {7'b000_0100, 3'b101}: if (RV32B == RV32BFull) alu_operator_o = ALU_UNSHFL; // unshfl
 
             // RV32B zbc
-            {7'b000_0101, 3'b001}: if (RV32B == RV32BFull) alu_operator_o = ALU_CLMUL;  // clmul
+            {7'b000_0101, 3'b001}: begin
+              if      (RV32B  == RV32BFull ) alu_operator_o = ALU_CLMUL;  // clmul
+              else if (RV32Zk != RV32ZkNone) alu_operator_o = ZKB_CLMUL;  // clmul
+            end
             {7'b000_0101, 3'b010}: if (RV32B == RV32BFull) alu_operator_o = ALU_CLMULR; // clmulr
-            {7'b000_0101, 3'b011}: if (RV32B == RV32BFull) alu_operator_o = ALU_CLMULH; // clmulh
+            {7'b000_0101, 3'b011}: begin
+              if      (RV32B  == RV32BFull ) alu_operator_o = ALU_CLMULH; // clmulh
+              else if (RV32Zk != RV32ZkNone) alu_operator_o = ZKB_CLMULH; // clmulh
+            end
 
             // RV32B zbe
             {7'b010_0100, 3'b110}: begin
@@ -1073,6 +1123,10 @@ module ibex_decoder #(
               end
             end
 
+            // RV32Zk zbk
+            {7'b001_0100, 3'b100}: if (RV32Zk != RV32ZkNone) alu_operator_o = ZKB_XPERM8; // xperm8
+            {7'b001_0100, 3'b010}: if (RV32Zk != RV32ZkNone) alu_operator_o = ZKB_XPERM4; // xperm4
+
             // RV32Zk zkh
             {7'b010_1000, 3'b000}: if (RV32Zk == RV32Zkn) alu_operator_o = ZKN_SHA512SUM0R; // sha512_sum0r
             {7'b010_1001, 3'b000}: if (RV32Zk == RV32Zkn) alu_operator_o = ZKN_SHA512SUM1R; // sha512_sum1r
diff --git a/rtl/ibex_pkg.sv b/rtl/ibex_pkg.sv
index 180bba0117..96f1816d24 100644
--- a/rtl/ibex_pkg.sv
+++ b/rtl/ibex_pkg.sv
@@ -185,6 +185,24 @@ package ibex_pkg;
     ALU_CRC32_W,
     ALU_CRC32C_W,
 
+    // Zbkb
+    ZKB_RORI,
+    ZKB_BREV8,
+    ZKB_REV8,
+    ZKB_ZIP,
+    ZKB_UNZIP,
+    ZKB_ROR,
+    ZKB_ROL,
+    ZKB_ANDN,
+    ZKB_ORN,
+    ZKB_XNOR,
+    ZKB_PACK,
+    ZKB_PACKH,
+    ZKB_CLMUL,
+    ZKB_CLMULH,
+    ZKB_XPERM8,
+    ZKB_XPERM4,
+
     // Zkn
     ZKN_SHA256SUM0,
     ZKN_SHA256SUM1,
diff --git a/rtl/ibex_sm4_sbox.sv b/rtl/ibex_sm4_sbox.sv
index ebe5b1661b..bd814f3d85 100644
--- a/rtl/ibex_sm4_sbox.sv
+++ b/rtl/ibex_sm4_sbox.sv
@@ -5,6 +5,15 @@
 
 /**
  * SM4 Sbox unit
+ * This modified version is derived from the orignal implemenatation by Markku-Juhani O. Saarinen,
+ * which bases on the optimised AES structure proposed by Boyar & Peralta [BoPe12].
+ * S-Boxes are broken into a nonlinear middle layer and two linear top and bottom layers.
+ * Two linear top and bottom layers are modified to adapt for SM4 cipher.
+ *
+ * [BoPe12] Boyar J., Peralta R. "A Small Depth-16 Circuit for the AES
+ *     S-Box." Proc.SEC 2012. IFIP AICT 376. Springer, pp. 287-298 (2012)
+ *     DOI: https://doi.org/10.1007/978-3-642-30436-1_24
+ *     Preprint: https://eprint.iacr.org/2011/332.pdf
  */
 module ibex_sm4_sbox (
 input  logic [7:0] in,
diff --git a/rtl/ibex_top.sv b/rtl/ibex_top.sv
index 0ab264e5d1..d58086d4cf 100644
--- a/rtl/ibex_top.sv
+++ b/rtl/ibex_top.sv
@@ -21,7 +21,7 @@ module ibex_top import ibex_pkg::*; #(
   parameter bit          RV32E            = 1'b0,
   parameter rv32m_e      RV32M            = RV32MFast,
   parameter rv32b_e      RV32B            = RV32BNone,
-  parameter rv32zk_e     RV32Zk           = RV32Zks,
+  parameter rv32zk_e     RV32Zk           = RV32Zkn,
   parameter regfile_e    RegFile          = RegFileFF,
   parameter bit          BranchTargetALU  = 1'b0,
   parameter bit          WritebackStage   = 1'b0,

From 887d877fd86c4ca9bfba7b4c6259199a43340109 Mon Sep 17 00:00:00 2001
From: phthinh <hung3@e.ntu.edu.sg>
Date: Fri, 26 Nov 2021 15:33:12 +0000
Subject: [PATCH 10/21] Zk: add the implementation of the Zbk(Zbkb, Zbkc, Zbkx)
 instructions.

---
 ibex_core.core         |   1 +
 rtl/ibex_poly16_mul.sv | 394 +++++++++++++++++++++++++++++++++++++++++
 rtl/ibex_zk.sv         | 190 +++++++++++++++++++-
 3 files changed, 582 insertions(+), 3 deletions(-)
 create mode 100644 rtl/ibex_poly16_mul.sv

diff --git a/ibex_core.core b/ibex_core.core
index 3b68d0a13c..82dab3a98c 100644
--- a/ibex_core.core
+++ b/ibex_core.core
@@ -37,6 +37,7 @@ filesets:
       - rtl/ibex_zk.sv
       - rtl/ibex_aes_sbox.sv
       - rtl/ibex_sm4_sbox.sv
+      - rtl/ibex_poly16_mul.sv
       - rtl/ibex_core.sv
     file_type: systemVerilogSource
 
diff --git a/rtl/ibex_poly16_mul.sv b/rtl/ibex_poly16_mul.sv
new file mode 100644
index 0000000000..b8b8b74e9d
--- /dev/null
+++ b/rtl/ibex_poly16_mul.sv
@@ -0,0 +1,394 @@
+// Copyright lowRISC contributors.
+// Copyright 2018 ETH Zurich and University of Bologna, see also CREDITS.md.
+// Licensed under the Apache License, Version 2.0, see LICENSE for details.
+// SPDX-License-Identifier: Apache-2.0
+
+/**
+ * 16-bit Polynominal Multiplier unit
+ * The implemenation follows the circuit optimisation introduced by the NIST circuit complexity team [1]
+ *
+ * [1] https://github.com/usnistgov/Circuits/
+ */
+module ibex_poly16_mul (
+input  logic [15:0] a,
+input  logic [15:0] b,
+output logic [31:0] r
+);
+
+  logic       t1,   t2,   t3,   t4,   t5,   t6,   t7,   t8,   t9,   t10,  t11,  t12,  t13,  t14,  t15,  t16,  t17,  t18,  t19;
+  logic t20,  t21,  t22,  t23,  t24,  t25,  t26,  t27,  t28,  t29,  t30,  t31,  t32,  t33,  t34,  t35,  t36,  t37,  t38,  t39;
+  logic t40,  t41,  t42,  t43,  t44,  t45,  t46,  t47,  t48,  t49,  t50,  t51,  t52,  t53,  t54,  t55,  t56,  t57,  t58,  t59;
+  logic t60,  t61,  t62,  t63,  t64,  t65,  t66,  t67,  t68,  t69,  t70,  t71,  t72,  t73,  t74,  t75,  t76,  t77,  t78,  t79;
+  logic t80,  t81,  t82,  t83,  t84,  t85,  t86,  t87,  t88,  t89,  t90,  t91,  t92,  t93,  t94,  t95,  t96,  t97,  t98,  t99;
+
+  logic t100, t101, t102, t103, t104, t105, t106, t107, t108, t109, t110, t111, t112, t113, t114, t115, t116, t117, t118, t119;
+  logic t120, t121, t122, t123, t124, t125, t126, t127, t128, t129, t130, t131, t132, t133, t134, t135, t136, t137, t138, t139;
+  logic t140, t141, t142, t143, t144, t145, t146, t147, t148, t149, t150, t151, t152, t153, t154, t155, t156, t157, t158, t159;
+  logic t160, t161, t162, t163, t164, t165, t166, t167, t168, t169, t170, t171, t172, t173, t174, t175, t176, t177, t178, t179;
+  logic t180, t181, t182, t183, t184, t185, t186, t187, t188, t189, t190, t191, t192, t193, t194, t195, t196, t197, t198, t199;
+
+  logic t200, t201, t202, t203, t204, t205, t206, t207, t208, t209, t210, t211, t212, t213, t214, t215, t216, t217, t218, t219;
+  logic t220, t221, t222, t223, t224, t225, t226, t227, t228, t229, t230, t231, t232, t233, t234, t235, t236, t237, t238, t239;
+  logic t240, t241, t242, t243, t244, t245, t246, t247, t248, t249, t250, t251, t252, t253, t254, t255, t256, t257, t258, t259;
+  logic t260, t261, t262, t263, t264, t265, t266, t267, t268, t269, t270, t271, t272, t273, t274, t275, t276, t277, t278, t279;
+  logic t280, t281, t282, t283, t284, t285, t286, t287, t288, t289, t290, t291, t292, t293, t294, t295, t296, t297, t298, t299;
+
+  logic t300, t301, t302, t303, t304, t305, t306, t307, t308, t309, t310, t311, t312, t313, t314, t315, t316, t317, t318;
+
+  logic z0,  z1,  z2,  z3,  z4,  z5,  z6,  z7,  z8,  z9;
+  logic z10, z11, z12, z13, z14, z15, z16, z17, z18, z19;
+  logic z20, z21, z22, z23, z24, z25, z26, z27, z28, z29, z30;
+
+  assign z30 = a[15] & b[15];
+  assign t1  = a[15] & b[12];
+  assign t2  = a[15] & b[13];
+  assign t3  = a[15] & b[14];
+  assign t4  = a[12] & b[15];
+  assign t5  = a[13] & b[15];
+  assign t6  = a[14] & b[15];
+  assign t7  = a[14] & b[14];
+  assign t8  = a[14] & b[12];
+  assign t9  = a[14] & b[13];
+  assign t10 = a[12] & b[14];
+  assign t11 = a[13] & b[14];
+  assign t12 = a[13] & b[13];
+  assign t13 = a[13] & b[12];
+  assign t14 = a[12] & b[13];
+  assign t15 = a[12] & b[12];
+  assign t16 = a[11] & b[11];
+  assign t17 = a[11] & b[ 8];
+  assign t18 = a[11] & b[ 9];
+  assign t19 = a[11] & b[10];
+  assign t20 = a[ 8] & b[11];
+  assign t21 = a[ 9] & b[11];
+  assign t22 = a[10] & b[11];
+  assign t23 = a[10] & b[10];
+  assign t24 = a[10] & b[ 8];
+  assign t25 = a[10] & b[ 9];
+  assign t26 = a[ 8] & b[10];
+  assign t27 = a[ 9] & b[10];
+  assign t28 = a[ 9] & b[ 9];
+  assign t29 = a[ 9] & b[ 8];
+  assign t30 = a[ 8] & b[ 9];
+  assign t31 = a[ 8] & b[ 8];
+  assign t32 = a[ 7] & b[ 7];
+  assign t33 = a[ 7] & b[ 4];
+  assign t34 = a[ 7] & b[ 5];
+  assign t35 = a[ 7] & b[ 6];
+  assign t36 = a[ 4] & b[ 7];
+  assign t37 = a[ 5] & b[ 7];
+  assign t38 = a[ 6] & b[ 7];
+  assign t39 = a[ 6] & b[ 6];
+  assign t40 = a[ 6] & b[ 4];
+  assign t41 = a[ 6] & b[ 5];
+  assign t42 = a[ 4] & b[ 6];
+  assign t43 = a[ 5] & b[ 6];
+  assign t44 = a[ 5] & b[ 5];
+  assign t45 = a[ 5] & b[ 4];
+  assign t46 = a[ 4] & b[ 5];
+  assign t47 = a[ 4] & b[ 4];
+  assign t48 = a[ 3] & b[ 3];
+  assign t49 = a[ 3] & b[ 0];
+  assign t50 = a[ 3] & b[ 1];
+  assign t51 = a[ 3] & b[ 2];
+  assign t52 = a[ 0] & b[ 3];
+  assign t53 = a[ 1] & b[ 3];
+  assign t54 = a[ 2] & b[ 3];
+  assign t55 = a[ 2] & b[ 2];
+  assign t56 = a[ 2] & b[ 0];
+  assign t57 = a[ 2] & b[ 1];
+  assign t58 = a[ 0] & b[ 2];
+  assign t59 = a[ 1] & b[ 2];
+  assign t60 = a[ 1] & b[ 1];
+  assign t61 = a[ 1] & b[ 0];
+  assign t62 = a[ 0] & b[ 1];
+  assign  z0 = a[ 0] & b[ 0];
+  assign t63 = b[ 8] ^ b[12];
+  assign t64 = b[ 9] ^ b[13];
+  assign t65 = b[10] ^ b[14];
+  assign t66 = b[11] ^ b[15];
+  assign t67 = a[ 8] ^ a[12];
+  assign t68 = a[ 9] ^ a[13];
+  assign t69 = a[10] ^ a[14];
+  assign t70 = a[11] ^ a[15];
+  assign t71 = t70 & t66;
+  assign t72 = t70 & t63;
+  assign t73 = t70 & t64;
+  assign t74 = t70 & t65;
+  assign t75 = t67 & t66;
+  assign t76 = t68 & t66;
+  assign t77 = t69 & t66;
+  assign t78 = t69 & t65;
+  assign t79 = t69 & t63;
+  assign t80 = t69 & t64;
+  assign t81 = t67 & t65;
+  assign t82 = t68 & t65;
+  assign t83 = t68 & t64;
+  assign t84 = t68 & t63;
+  assign t85 = t67 & t64;
+  assign t86 = t67 & t63;
+  assign t87 = b[0] ^ b[ 4];
+  assign t88 = b[1] ^ b[ 5];
+  assign t89 = b[2] ^ b[ 6];
+  assign t90 = b[3] ^ b[ 7];
+  assign t91 = a[0] ^ a[ 4];
+  assign t92 = a[1] ^ a[ 5];
+  assign t93 = a[2] ^ a[ 6];
+  assign t94 = a[3] ^ a[ 7];
+  assign t95 = t94  & t90;
+  assign t96 = t94  & t87;
+  assign t97 = t94  & t88;
+  assign t98 = t94  & t89;
+  assign t99 = t91  & t90;
+
+  assign t100 = t92 & t90;
+  assign t101 = t93 & t90;
+  assign t102 = t93 & t89;
+  assign t103 = t93 & t87;
+  assign t104 = t93 & t88;
+  assign t105 = t91 & t89;
+  assign t106 = t92 & t89;
+  assign t107 = t92 & t88;
+  assign t108 = t92 & t87;
+  assign t109 = t91 & t88;
+  assign t110 = t91 & t87;
+  assign t111 = b[4] ^ b[12];
+  assign t112 = b[5] ^ b[13];
+  assign t113 = b[6] ^ b[14];
+  assign t114 = b[7] ^ b[15];
+  assign t115 = b[0] ^ b[ 8];
+  assign t116 = b[1] ^ b[ 9];
+  assign t117 = b[2] ^ b[10];
+  assign t118 = b[3] ^ b[11];
+  assign t119 = a[4] ^ a[12];
+  assign t120 = a[5] ^ a[13];
+  assign t121 = a[6] ^ a[14];
+  assign t122 = a[7] ^ a[15];
+  assign t123 = a[0] ^ a[ 8];
+  assign t124 = a[1] ^ a[ 9];
+  assign t125 = a[2] ^ a[10];
+  assign t126 = a[3] ^ a[11];
+  assign t127 = t126 & t118;
+  assign t128 = t126 & t115;
+  assign t129 = t126 & t116;
+  assign t130 = t126 & t117;
+  assign t131 = t123 & t118;
+  assign t132 = t124 & t118;
+  assign t133 = t125 & t118;
+  assign t134 = t125 & t117;
+  assign t135 = t125 & t115;
+  assign t136 = t125 & t116;
+  assign t137 = t123 & t117;
+  assign t138 = t124 & t117;
+  assign t139 = t124 & t116;
+  assign t140 = t124 & t115;
+  assign t141 = t123 & t116;
+  assign t142 = t123 & t115;
+  assign t143 = t122 & t114;
+  assign t144 = t122 & t111;
+  assign t145 = t122 & t112;
+  assign t146 = t122 & t113;
+  assign t147 = t119 & t114;
+  assign t148 = t120 & t114;
+  assign t149 = t121 & t114;
+  assign t150 = t121 & t113;
+  assign t151 = t121 & t111;
+  assign t152 = t121 & t112;
+  assign t153 = t119 & t113;
+  assign t154 = t120 & t113;
+  assign t155 = t120 & t112;
+  assign t156 = t120 & t111;
+  assign t157 = t119 & t112;
+  assign t158 = t119 & t111;
+  assign t159 = t115 ^ t111;
+  assign t160 = t116 ^ t112;
+  assign t161 = t117 ^ t113;
+  assign t162 = t118 ^ t114;
+  assign t163 = t123 ^ t119;
+  assign t164 = t124 ^ t120;
+  assign t165 = t125 ^ t121;
+  assign t166 = t126 ^ t122;
+  assign t167 = t166 & t162;
+  assign t168 = t166 & t159;
+  assign t169 = t166 & t160;
+  assign t170 = t166 & t161;
+  assign t171 = t163 & t162;
+  assign t172 = t164 & t162;
+  assign t173 = t165 & t162;
+  assign t174 = t165 & t161;
+  assign t175 = t165 & t159;
+  assign t176 = t165 & t160;
+  assign t177 = t163 & t161;
+  assign t178 = t164 & t161;
+  assign t179 = t164 & t160;
+  assign t180 = t164 & t159;
+  assign t181 = t163 & t160;
+  assign t182 = t163 & t159;
+  assign t183 = t73  ^ t76;
+  assign t184 = t97  ^ t100;
+  assign t185 = t15  ^ t18;
+  assign t186 = t129 ^ t132;
+  assign t187 = t134 ^ t158;
+  assign t188 = t145 ^ t148;
+  assign t189 = t169 ^ t172;
+  assign t190 = t2   ^ t5;
+  assign t191 = t21  ^ t23;
+  assign t192 = t31  ^ t34;
+  assign t193 = t37  ^ t39;
+  assign t194 = t47  ^ t50;
+  assign t195 = t53  ^ t55;
+  assign t196 = t183 ^ t78;
+  assign t197 = t192 ^ t193;
+  assign t198 = t194 ^ t195;
+  assign t199 = t184 ^ t102;
+  assign t200 = t185 ^ t191;
+  assign t201 = t186 ^ t187;
+  assign t202 = t188 ^ t150;
+  assign t203 = t189 ^ t174;
+  assign  z28 = t190 ^ t7;
+  assign t204 = t198 ^   z0;
+  assign   z4 = t110 ^ t204;
+  assign t205 = t200 ^  z28;
+  assign  z24 = t196 ^ t205;
+  assign t206 = t197 ^ t199;
+  assign t207 = t197 ^ t86;
+  assign t208 = t202 ^ t205;
+  assign  z20 = t207 ^ t208;
+  assign t209 = t142 ^ t204;
+  assign   z8 = t206 ^ t209;
+  assign t210 = t196 ^ t198;
+  assign t211 = t201 ^ t206;
+  assign t212 = t208 ^ t210;
+  assign t213 = t211 ^ t212;
+  assign t214 = t200 ^ t201;
+  assign t215 = t110 ^ t182;
+  assign t216 = t209 ^ t214;
+  assign t217 = t215 ^ t207;
+  assign  z12 = t217 ^ t216;
+  assign  z16 = t213 ^ t203;
+  assign t218 = t74  ^ t77;
+  assign t219 = t84  ^ t85;
+  assign t220 = t13  ^ t14;
+  assign t221 = t98  ^ t101;
+  assign t222 = t108 ^ t109;
+  assign t223 = t130 ^ t133;
+  assign t224 = t140 ^ t141;
+  assign t225 = t146 ^ t149;
+  assign t226 = t156 ^ t157;
+  assign t227 = t170 ^ t173;
+  assign t228 = t19  ^ t22;
+  assign t229 = t180 ^ t181;
+  assign t230 = t29  ^ t30;
+  assign  z29 = t3   ^ t6;
+  assign t231 = t35  ^ t38;
+  assign t232 = t45  ^ t46;
+  assign t233 = t51  ^ t54;
+  assign   z1 = t61  ^ t62;
+  assign t234 = t228 ^ t220;
+  assign t235 = t230 ^ t231;
+  assign t236 = t232 ^ t233;
+  assign t237 = t223 ^ t226;
+  assign t238 =  z29 ^ t234;
+  assign  z25 = t218 ^ t238;
+  assign t239 =   z1 ^ t236;
+  assign   z5 = t222 ^ t239;
+  assign t240 = t219 ^ t235;
+  assign t241 = t235 ^ t221;
+  assign t242 = t224 ^ t239;
+  assign   z9 = t241 ^ t242;
+  assign t243 = t225 ^ t238;
+  assign  z21 = t240 ^ t243;
+  assign t244 = t218 ^ t236;
+  assign t245 = t237 ^ t241;
+  assign t246 = t243 ^ t244;
+  assign t247 = t245 ^ t227;
+  assign t248 = t234 ^ t237;
+  assign t249 = t222 ^ t240;
+  assign t250 = t242 ^ t248;
+  assign t251 = t249 ^ t229;
+  assign  z17 = t247 ^ t246;
+  assign  z13 = t251 ^ t250;
+  assign t252 = t10  ^ t12;
+  assign t253 = t79  ^ t81;
+  assign t254 = t103 ^ t105;
+  assign t255 = t127 ^ t151;
+  assign t256 = t135 ^ t137;
+  assign t257 = t153 ^ t155;
+  assign t258 = t16  ^ t8;
+  assign t259 = t175 ^ t177;
+  assign t260 = t24  ^ t26;
+  assign t261 = t28  ^ t32;
+  assign t262 = t40  ^ t42;
+  assign t263 = t44  ^ t48;
+  assign t264 = t56  ^ t58;
+  assign t265 = t252 ^ t258;
+  assign t266 = t261 ^ t260;
+  assign t267 = t262 ^ t263;
+  assign   z2 = t264 ^ t60;
+  assign t268 = t253 ^ t83;
+  assign t269 = t254 ^ t107;
+  assign t270 = t255 ^ t257;
+  assign t271 = t256 ^ t139;
+  assign t272 = t259 ^ t179;
+  assign t273 = t265 ^  z30;
+  assign  z26 = t71  ^ t273;
+  assign t274 = t267 ^   z2;
+  assign   z6 = t269 ^ t274;
+  assign t275 = t266 ^ t268;
+  assign t276 = t266 ^ t95 ;
+  assign t277 = t271 ^ t274;
+  assign  z10 = t276 ^ t277;
+  assign t278 = t143 ^ t273;
+  assign  z22 = t275 ^ t278;
+  assign t279 = t265 ^ t269;
+  assign t280 = t270 ^ t275;
+  assign t281 = t277 ^ t279;
+  assign t282 = t280 ^ t281;
+  assign t283 = t267 ^ t270;
+  assign t284 = t71  ^ t167;
+  assign t285 = t278 ^ t283;
+  assign t286 = t284 ^ t276;
+  assign  z14 = t282 ^ t272;
+  assign  z18 = t286 ^ t285;
+  assign t287 = t9   ^ t11;
+  assign t288 = t72  ^ t75;
+  assign t289 = t80  ^ t82;
+  assign t290 = t96  ^ t99;
+  assign t291 = t104 ^ t106;
+  assign t292 = t1   ^ t4;
+  assign t293 = t128 ^ t131;
+  assign t294 = t136 ^ t138;
+  assign t295 = t144 ^ t147;
+  assign t296 = t152 ^ t154;
+  assign t297 = t17  ^ t20;
+  assign t298 = t168 ^ t171;
+  assign t299 = t176 ^ t178;
+  assign t300 = t25  ^ t27;
+  assign t301 = t33  ^ t36;
+  assign t302 = t41  ^ t43;
+  assign t303 = t49  ^ t52;
+  assign t304 = t57  ^ t59;
+  assign  z27 = t287 ^ t292;
+  assign t305 = t296 ^ t295;
+  assign t306 = t297 ^ t300;
+  assign t307 = t298 ^ t299;
+  assign t308 = t301 ^ t302;
+  assign   z3 = t303 ^ t304;
+  assign t309 = t288 ^ t289;
+  assign t310 = t290 ^ t291;
+  assign t311 = t293 ^ t294;
+  assign t312 =  z27 ^ t306;
+  assign  z23 = t309 ^ t312;
+  assign t313 = t308 ^   z3;
+  assign   z7 = t310 ^ t313;
+  assign t314 = t305 ^ t308;
+  assign  z19 = t312 ^ t314;
+  assign t315 = t306 ^ t311;
+  assign  z11 = t313 ^ t315;
+  assign t316 = t305 ^ t311;
+  assign t317 =  z23 ^   z7;
+  assign t318 = t316 ^ t307;
+  assign  z15 = t318 ^ t317;
+
+  assign r = {1'b0,z30,z29,z28,z27,z26,z25,z24,z23,z22,z21,z20,z19,z18,z17,z16,z15,z14,z13,z12,z11,z10,z9,z8,z7,z6,z5,z4,z3,z2,z1,z0};
+endmodule
diff --git a/rtl/ibex_zk.sv b/rtl/ibex_zk.sv
index e5d3029e84..557c4a23e4 100644
--- a/rtl/ibex_zk.sv
+++ b/rtl/ibex_zk.sv
@@ -4,7 +4,7 @@
 // SPDX-License-Identifier: Apache-2.0
 
 /**
- * Zk Extension unit
+ * Zk Extension unit: An implemenation for the RISC-V Cryptography Extension.
  */
 module ibex_zk #(
   parameter ibex_pkg::rv32zk_e RV32Zk = ibex_pkg::RV32ZkNone
@@ -22,6 +22,59 @@ module ibex_zk #(
 `define SRLI32(a,b) ((a >> b)              )
 `define SLLI32(a,b) ((a << b)              )
 
+// 32-bit Barrel Right Rotation
+function automatic logic [31:0] ror32(logic [31:0] x, logic [4:0] amt);
+    logic [31:0] ro, l8, l4, l2, l1, l0;
+    assign l0 = x;
+    assign l1 = ({32{amt[0]}} & {l0[   0], l0[31: 1]}) | ({32{!amt[0]}} & l0[31:0]);
+    assign l2 = ({32{amt[1]}} & {l1[ 1:0], l1[31: 2]}) | ({32{!amt[1]}} & l1[31:0]);
+    assign l4 = ({32{amt[2]}} & {l2[ 3:0], l2[31: 4]}) | ({32{!amt[2]}} & l2[31:0]);
+    assign l8 = ({32{amt[3]}} & {l4[ 7:0], l4[31: 8]}) | ({32{!amt[3]}} & l4[31:0]);
+    assign ro = ({32{amt[4]}} & {l8[15:0], l8[31:16]}) | ({32{!amt[4]}} & l8[31:0]);
+    return ro;
+endfunction
+
+// 32-bit Barrel Left Rotation
+function automatic logic [31:0] rol32(logic [31:0] x, logic [4:0] amt);
+    logic [31:0] ro, l8, l4, l2, l1, l0;
+    assign l0 = x;
+    assign l1 = ({32{amt[0]}} & {l0[30:0], l0[31   ]}) | ({32{!amt[0]}} & l0[31:0]);
+    assign l2 = ({32{amt[1]}} & {l1[29:0], l1[31:30]}) | ({32{!amt[1]}} & l1[31:0]);
+    assign l4 = ({32{amt[2]}} & {l2[27:0], l2[31:28]}) | ({32{!amt[2]}} & l2[31:0]);
+    assign l8 = ({32{amt[3]}} & {l4[23:0], l4[31:24]}) | ({32{!amt[3]}} & l4[31:0]);
+    assign ro = ({32{amt[4]}} & {l8[15:0], l8[31:16]}) | ({32{!amt[4]}} & l8[31:0]);
+    return ro;
+endfunction
+
+// reverse 8 bits
+function automatic logic [7:0] rev8(logic [7:0] x);
+    logic [7:0]  rb;
+    for (int i = 0;  i < 8; i = i + 1) begin
+        assign rb[i] = x[8-i-1];
+    end
+    return rb;
+endfunction
+
+// 32-bit Zip
+function automatic logic [31:0] zip32(logic [31:0] x);
+    logic [15:0] zh, zl;
+    for (int i = 0;  i < 16; i = i + 1) begin
+        assign zh[i] = x[2*i + 1];
+        assign zl[i] = x[2*i    ];
+    end
+    return {zh, zl};
+endfunction
+
+// 32-bit UnZip
+function automatic logic [31:0] unzip32(logic [31:0] x);
+    logic [31:0] uz;
+    for (int i = 0;  i < 16; i = i + 1) begin
+        assign uz[2*i  ] = x[i];
+        assign uz[2*i+1] = x[i+16];
+    end
+    return uz;
+endfunction
+
 // Multiply by 2 in GF(2^8) modulo 8'h1b
 function automatic logic [7:0] xtime2(logic [7:0] a);
     logic [7:0] xtime2;
@@ -39,6 +92,137 @@ function automatic logic [7:0] xtimeN(logic [7:0] a, logic [3:0] b);
     return xtimeN;
 endfunction
 
+  logic        zkb_val;
+  logic [31:0] zkb_result;
+  if (RV32Zk != RV32ZkNone) begin : gen_zkb
+    logic ror_sel, rol_sel, rori_sel, andn_sel, orn_sel, xnor_sel;
+    logic pack_sel, packh_sel, brev8_sel, rev8_sel, zip_sel, unzip_sel;
+    logic clmull_sel, clmulh_sel, xperm8_sel, xperm4_sel;
+    assign    ror_sel = (operator_i == ZKB_ROR);
+    assign    rol_sel = (operator_i == ZKB_ROL);
+    assign   rori_sel = (operator_i == ZKB_RORI);
+    assign   andn_sel = (operator_i == ZKB_ANDN);
+    assign    orn_sel = (operator_i == ZKB_ORN);
+    assign   xnor_sel = (operator_i == ZKB_XNOR);
+    assign   pack_sel = (operator_i == ZKB_PACK);
+    assign  packh_sel = (operator_i == ZKB_PACKH);
+    assign  brev8_sel = (operator_i == ZKB_BREV8);
+    assign   rev8_sel = (operator_i == ZKB_REV8);
+    assign    zip_sel = (operator_i == ZKB_ZIP);
+    assign  unzip_sel = (operator_i == ZKB_UNZIP);
+    assign clmull_sel = (operator_i == ZKB_CLMUL );
+    assign clmulh_sel = (operator_i == ZKB_CLMULH);
+    assign xperm8_sel = (operator_i == ZKB_XPERM8);
+    assign xperm4_sel = (operator_i == ZKB_XPERM4);
+
+    logic [ 4:0] shamt;
+    assign shamt  = operand_b_i[4:0];
+
+    logic [31:0] wror, wrol, wandn, worn, wxnor, wpack, wpackh;
+    assign wror   = ror32(operand_a_i, shamt);
+    assign wrol   = rol32(operand_a_i, shamt);
+    assign wandn  = operand_a_i & (~operand_b_i);
+    assign worn   = operand_a_i | (~operand_b_i);
+    assign wxnor  = operand_a_i ^ (~operand_b_i);
+    assign wpack  = {       operand_b_i[15:0], operand_a_i[15:0]};
+    assign wpackh = {16'd0, operand_b_i[ 7:0], operand_a_i[ 7:0]};
+
+    logic [ 7:0] rs1_b0, rs1_b1, rs1_b2, rs1_b3;
+    assign rs1_b0  = operand_a_i[ 7: 0];
+    assign rs1_b1  = operand_a_i[15: 8];
+    assign rs1_b2  = operand_a_i[23:16];
+    assign rs1_b3  = operand_a_i[31:24];
+
+    logic [ 7:0] brev8_0, brev8_1, brev8_2, brev8_3;
+    assign brev8_0 = rev8(rs1_b0);
+    assign brev8_1 = rev8(rs1_b1);
+    assign brev8_2 = rev8(rs1_b2);
+    assign brev8_3 = rev8(rs1_b3);
+
+    logic [31:0] wbrev8, wrev8;
+    assign wbrev8  = {brev8_3, brev8_2, brev8_1, brev8_0};
+    assign wrev8   = {rs1_b0,  rs1_b1,  rs1_b2,  rs1_b3};
+
+    logic [31:0] wzip, wunzip;
+    assign wzip   = zip32(  operand_a_i);
+    assign wunzip = unzip32(operand_a_i);
+
+    // Xperm instructions
+    // indexable access 4-bit LUT.
+    logic [ 3:0] lut_4b [7:0];
+    logic [31:0] wxperm4;
+    for(genvar i = 0; i < 8; i = i + 1) begin : gen_lut_xperm4
+      // generate table.
+      assign lut_4b[i] = operand_a_i[4*i+:4];
+
+      logic [2:0] lut_8idx;
+      assign lut_8idx   = operand_b_i[4*i+:3];
+
+      logic [3:0] lut4_out;
+      assign lut4_out = lut_4b[lut_8idx];
+      assign wxperm4[i*4+:4]  = operand_b_i[4*i+3] ? 4'b0000 : lut4_out;
+    end
+
+   // indexable access 8-bit LUT.
+    logic [ 7:0] lut_8b [3:0];
+    logic [31:0] wxperm8;
+    for(genvar i = 0; i < 4; i = i + 1) begin : gen_lut_xperm8
+      // generate table.
+      assign lut_8b[i] = operand_a_i[8*i+:8];
+
+      logic [1:0] lut_4idx;
+      assign lut_4idx   = operand_b_i[8*i+:2];
+
+      logic [7:0] lut8_out;
+      assign lut8_out = lut_8b[lut_4idx];
+      assign wxperm8[i*8+:8]  = |{operand_b_i[8*i+7:8*i+2]} ? 8'd0 : lut8_out;
+    end
+
+    // clmul instructions
+    logic [15:0] lhs0, rhs0, lhs1, rhs1, lhs2, rhs2;
+    assign lhs0 = clmulh_sel? operand_a_i[31:16] : operand_a_i[15: 0];
+    assign rhs0 = clmulh_sel? operand_b_i[31:16] : operand_b_i[15: 0];
+
+    assign lhs1 = operand_a_i[15: 0];
+    assign rhs1 = operand_b_i[31:16];
+
+    assign lhs2 = operand_a_i[31:16];
+    assign rhs2 = operand_b_i[15: 0];
+
+    logic [31:0]  polymul0, polymul1, polymul2;
+    ibex_poly16_mul mul16_ins0(lhs0, rhs0, polymul0);
+    ibex_poly16_mul mul16_ins1(lhs1, rhs1, polymul1);
+    ibex_poly16_mul mul16_ins2(lhs2, rhs2, polymul2);
+
+    logic [31:0] wclmull, wclmulh, clmulm;
+    assign clmulm  = polymul1 ^ polymul2;
+    assign wclmulh = {polymul0[31:16], (polymul0[15: 0] ^ clmulm[31:16])                 };
+    assign wclmull = {                 (polymul0[31:16] ^ clmulm[15: 0]), polymul0[15: 0]};
+
+    assign zkb_val    = |{ror_sel, rol_sel, rori_sel, andn_sel, orn_sel, xnor_sel,
+                          pack_sel, packh_sel, brev8_sel, rev8_sel, zip_sel, unzip_sel,
+                          clmull_sel, clmulh_sel, xperm8_sel, xperm4_sel};
+    assign zkb_result = {32{   ror_sel}} & wror    |
+                        {32{   rol_sel}} & wrol    |
+                        {32{  rori_sel}} & wror    |
+                        {32{  andn_sel}} & wandn   |
+                        {32{   orn_sel}} & worn    |
+                        {32{  xnor_sel}} & wxnor   |
+                        {32{  pack_sel}} & wpack   |
+                        {32{ packh_sel}} & wpackh  |
+                        {32{ brev8_sel}} & wbrev8  |
+                        {32{  rev8_sel}} & wrev8   |
+                        {32{   zip_sel}} & wzip    |
+                        {32{ unzip_sel}} & wunzip  |
+                        {32{clmull_sel}} & wclmull |
+                        {32{clmulh_sel}} & wclmulh |
+                        {32{xperm8_sel}} & wxperm8 |
+                        {32{xperm4_sel}} & wxperm4 ;
+  end else begin : no_gen_zkb
+    assign zkb_val    =  1'b0;
+    assign zkb_result = 32'd0;
+  end
+
   logic        zkn_val;
   logic [31:0] zkn_result;
 
@@ -221,8 +405,8 @@ endfunction
     assign zks_result = 32'd0;
   end
 
-  assign zk_val_o = zkn_val   || zks_val;
-  assign result_o = zkn_result | zks_result;
+  assign zk_val_o = zkb_val   || zkn_val   || zks_val;
+  assign result_o = zkb_result | zkn_result | zks_result;
 
 `undef RORI32
 `undef ROLI32

From e8967f82c1647bca688d5114308adf62668eaa80 Mon Sep 17 00:00:00 2001
From: phthinh <hung3@e.ntu.edu.sg>
Date: Tue, 30 Nov 2021 14:57:42 +0000
Subject: [PATCH 11/21] ZK: small fixes for zip/unzip instructions

---
 rtl/ibex_zk.sv | 17 +++++++++--------
 1 file changed, 9 insertions(+), 8 deletions(-)

diff --git a/rtl/ibex_zk.sv b/rtl/ibex_zk.sv
index 557c4a23e4..dc7ba9773a 100644
--- a/rtl/ibex_zk.sv
+++ b/rtl/ibex_zk.sv
@@ -57,24 +57,25 @@ endfunction
 
 // 32-bit Zip
 function automatic logic [31:0] zip32(logic [31:0] x);
-    logic [15:0] zh, zl;
+    logic [31:0] uz;
     for (int i = 0;  i < 16; i = i + 1) begin
-        assign zh[i] = x[2*i + 1];
-        assign zl[i] = x[2*i    ];
+        assign uz[2*i  ] = x[i];
+        assign uz[2*i+1] = x[i+16];
     end
-    return {zh, zl};
+    return uz;
 endfunction
 
 // 32-bit UnZip
 function automatic logic [31:0] unzip32(logic [31:0] x);
-    logic [31:0] uz;
+    logic [15:0] zh, zl;
     for (int i = 0;  i < 16; i = i + 1) begin
-        assign uz[2*i  ] = x[i];
-        assign uz[2*i+1] = x[i+16];
+        assign zh[i] = x[2*i + 1];
+        assign zl[i] = x[2*i    ];
     end
-    return uz;
+    return {zh, zl};
 endfunction
 
+
 // Multiply by 2 in GF(2^8) modulo 8'h1b
 function automatic logic [7:0] xtime2(logic [7:0] a);
     logic [7:0] xtime2;

From 5f5eddba52a40d51151df81bd0ad6b1f48f21d6c Mon Sep 17 00:00:00 2001
From: phthinh <hung3@e.ntu.edu.sg>
Date: Thu, 2 Dec 2021 11:57:59 +0000
Subject: [PATCH 12/21] add the Zk feature for Ibex configurations.

---
 .../simple_system/ibex_simple_system.core     |  7 +++
 .../simple_system/rtl/ibex_simple_system.sv   |  6 +++
 ibex_configs.yaml                             | 44 +++++++++++++++++++
 ibex_core.core                                |  6 +++
 rtl/ibex_ex_block.sv                          |  3 ++
 rtl/ibex_top.sv                               |  2 +-
 rtl/ibex_top_tracing.sv                       |  2 +
 7 files changed, 69 insertions(+), 1 deletion(-)

diff --git a/examples/simple_system/ibex_simple_system.core b/examples/simple_system/ibex_simple_system.core
index 1813bab074..ba0ceb8039 100644
--- a/examples/simple_system/ibex_simple_system.core
+++ b/examples/simple_system/ibex_simple_system.core
@@ -31,6 +31,12 @@ parameters:
     paramtype: vlogdefine
     description: "Bitmanip implementation parameter enum. See the ibex_pkg::rv32b_e enum in ibex_pkg.sv for permitted values."
 
+  RV32Zk:
+    datatype: str
+    default: ibex_pkg::RV32ZkNone
+    paramtype: vlogdefine
+    description: "Zk cryptography extension implementation parameter enum. See the ibex_pkg::rv32zk_e enum in ibex_pkg.sv for permitted values."
+
   RegFile:
     datatype: str
     default: ibex_pkg::RegFileFF
@@ -105,6 +111,7 @@ targets:
       - RV32E
       - RV32M
       - RV32B
+      - RV32Zk
       - RegFile
       - ICache
       - ICacheECC
diff --git a/examples/simple_system/rtl/ibex_simple_system.sv b/examples/simple_system/rtl/ibex_simple_system.sv
index f54a8983f0..62a350e274 100644
--- a/examples/simple_system/rtl/ibex_simple_system.sv
+++ b/examples/simple_system/rtl/ibex_simple_system.sv
@@ -14,6 +14,10 @@
   `define RV32B ibex_pkg::RV32BNone
 `endif
 
+`ifndef RV32B
+  `define RV32B ibex_pkg::RV32ZkNone
+`endif
+
 `ifndef RegFile
   `define RegFile ibex_pkg::RegFileFF
 `endif
@@ -42,6 +46,7 @@ module ibex_simple_system (
   parameter bit                 RV32E                    = 1'b0;
   parameter ibex_pkg::rv32m_e   RV32M                    = `RV32M;
   parameter ibex_pkg::rv32b_e   RV32B                    = `RV32B;
+  parameter ibex_pkg::rv32zk_e  RV32Zk                   = `RV32Zk;
   parameter ibex_pkg::regfile_e RegFile                  = `RegFile;
   parameter bit                 BranchTargetALU          = 1'b0;
   parameter bit                 WritebackStage           = 1'b0;
@@ -170,6 +175,7 @@ module ibex_simple_system (
       .RV32E           ( RV32E           ),
       .RV32M           ( RV32M           ),
       .RV32B           ( RV32B           ),
+      .RV32Zk          ( RV32Zk          ),
       .RegFile         ( RegFile         ),
       .BranchTargetALU ( BranchTargetALU ),
       .ICache          ( ICache          ),
diff --git a/ibex_configs.yaml b/ibex_configs.yaml
index 1765c3f42e..9b377dda80 100644
--- a/ibex_configs.yaml
+++ b/ibex_configs.yaml
@@ -11,6 +11,7 @@ small:
   RV32E                    : 0
   RV32M                    : "ibex_pkg::RV32MFast"
   RV32B                    : "ibex_pkg::RV32BNone"
+  RV32Zk                   : "ibex_pkg::RV32ZkNone"
   RegFile                  : "ibex_pkg::RegFileFF"
   BranchTargetALU          : 0
   WritebackStage           : 0
@@ -27,6 +28,7 @@ opentitan:
   RV32E                    : 0
   RV32M                    : "ibex_pkg::RV32MSingleCycle"
   RV32B                    : "ibex_pkg::RV32BNone"
+  RV32Zk                   : "ibex_pkg::RV32ZkNone"
   RegFile                  : "ibex_pkg::RegFileFF"
   BranchTargetALU          : 1
   WritebackStage           : 1
@@ -49,6 +51,7 @@ experimental-maxperf:
   RV32E                    : 0
   RV32M                    : "ibex_pkg::RV32MSingleCycle"
   RV32B                    : "ibex_pkg::RV32BNone"
+  RV32Zk                   : "ibex_pkg::RV32ZkNone"
   RegFile                  : "ibex_pkg::RegFileFF"
   BranchTargetALU          : 1
   WritebackStage           : 1
@@ -65,6 +68,7 @@ experimental-maxperf-pmp:
   RV32E                    : 0
   RV32M                    : "ibex_pkg::RV32MSingleCycle"
   RV32B                    : "ibex_pkg::RV32BNone"
+  RV32Zk                   : "ibex_pkg::RV32ZkNone"
   RegFile                  : "ibex_pkg::RegFileFF"
   BranchTargetALU          : 1
   WritebackStage           : 1
@@ -81,6 +85,7 @@ experimental-maxperf-pmp-bmbalanced:
   RV32E                    : 0
   RV32M                    : "ibex_pkg::RV32MSingleCycle"
   RV32B                    : "ibex_pkg::RV32BBalanced"
+  RV32Zk                   : "ibex_pkg::RV32ZkNone"
   RegFile                  : "ibex_pkg::RegFileFF"
   BranchTargetALU          : 1
   WritebackStage           : 1
@@ -97,6 +102,7 @@ experimental-maxperf-pmp-bmfull:
   RV32E                    : 0
   RV32M                    : "ibex_pkg::RV32MSingleCycle"
   RV32B                    : "ibex_pkg::RV32BFull"
+  RV32Zk                   : "ibex_pkg::RV32ZkNone"
   RegFile                  : "ibex_pkg::RegFileFF"
   BranchTargetALU          : 1
   WritebackStage           : 1
@@ -108,11 +114,47 @@ experimental-maxperf-pmp-bmfull:
   PMPNumRegions            : 16
   SecureIbex               : 0
 
+# experimental-maxperf-pmp config above with zkn extension
+experimental-maxperf-pmp-zkn:
+  RV32E                    : 0
+  RV32M                    : "ibex_pkg::RV32MSingleCycle"
+  RV32B                    : "ibex_pkg::RV32BFull"
+  RV32Zk                   : "ibex_pkg::RV32Zkn"
+  RegFile                  : "ibex_pkg::RegFileFF"
+  BranchTargetALU          : 1
+  WritebackStage           : 1
+  ICache                   : 0
+  ICacheECC                : 0
+  BranchPredictor          : 0
+  PMPEnable                : 1
+  PMPGranularity           : 0
+  PMPNumRegions            : 16
+  SecureIbex               : 0
+
+# experimental-maxperf-pmp config above with zks extension
+experimental-maxperf-pmp-zks:
+  RV32E                    : 0
+  RV32M                    : "ibex_pkg::RV32MSingleCycle"
+  RV32B                    : "ibex_pkg::RV32BFull"
+  RV32Zk                   : "ibex_pkg::RV32Zks"
+  RegFile                  : "ibex_pkg::RegFileFF"
+  BranchTargetALU          : 1
+  WritebackStage           : 1
+  ICache                   : 0
+  ICacheECC                : 0
+  BranchPredictor          : 0
+  PMPEnable                : 1
+  PMPGranularity           : 0
+  PMPNumRegions            : 16
+  SecureIbex               : 0
+
+
 # experimental-maxperf-pmp-bmfull config above with icache enabled
 experimental-maxperf-pmp-bmfull-icache:
   RV32E                    : 0
   RV32M                    : "ibex_pkg::RV32MSingleCycle"
   RV32B                    : "ibex_pkg::RV32BFull"
+  RV32Zk                   : "ibex_pkg::RV32ZkNone"
   RegFile                  : "ibex_pkg::RegFileFF"
   BranchTargetALU          : 1
   WritebackStage           : 1
@@ -132,6 +174,7 @@ experimental-branch-predictor:
   RV32E                    : 0
   RV32M                    : "ibex_pkg::RV32MSingleCycle"
   RV32B                    : "ibex_pkg::RV32BNone"
+  RV32Zk                   : "ibex_pkg::RV32ZkNone"
   RegFile                  : "ibex_pkg::RegFileFF"
   BranchTargetALU          : 1
   WritebackStage           : 1
@@ -143,3 +186,4 @@ experimental-branch-predictor:
   PMPNumRegions            : 4
   SecureIbex               : 0
 
+
diff --git a/ibex_core.core b/ibex_core.core
index 82dab3a98c..b43baae164 100644
--- a/ibex_core.core
+++ b/ibex_core.core
@@ -85,6 +85,12 @@ parameters:
     paramtype: vlogdefine
     description: "Bitmanip implementation parameter enum. See the ibex_pkg::rv32b_e enum in ibex_pkg.sv for permitted values."
 
+  RV32Zk:
+    datatype: str
+    default: ibex_pkg::RV32ZkNone
+    paramtype: vlogdefine
+    description: "Zk cryptography extension implementation parameter enum. See the ibex_pkg::rv32zk_e enum in ibex_pkg.sv for permitted values."
+
   RegFile:
     datatype: str
     default: ibex_pkg::RegFileFF
diff --git a/rtl/ibex_ex_block.sv b/rtl/ibex_ex_block.sv
index 127cf3d5b2..672ee012d7 100644
--- a/rtl/ibex_ex_block.sv
+++ b/rtl/ibex_ex_block.sv
@@ -148,6 +148,9 @@ module ibex_ex_block #(
     .result_o           (zke_result),
     .zk_val_o           (zke_val)
     );
+  end else begin : no_gen_Zkn
+  assign zke_result = 32'd0;
+  assign zke_val    =  1'b0;
   end
 
   ////////////////
diff --git a/rtl/ibex_top.sv b/rtl/ibex_top.sv
index d58086d4cf..1f7a01ed6e 100644
--- a/rtl/ibex_top.sv
+++ b/rtl/ibex_top.sv
@@ -21,7 +21,7 @@ module ibex_top import ibex_pkg::*; #(
   parameter bit          RV32E            = 1'b0,
   parameter rv32m_e      RV32M            = RV32MFast,
   parameter rv32b_e      RV32B            = RV32BNone,
-  parameter rv32zk_e     RV32Zk           = RV32Zkn,
+  parameter rv32zk_e     RV32Zk           = RV32ZkNone,
   parameter regfile_e    RegFile          = RegFileFF,
   parameter bit          BranchTargetALU  = 1'b0,
   parameter bit          WritebackStage   = 1'b0,
diff --git a/rtl/ibex_top_tracing.sv b/rtl/ibex_top_tracing.sv
index c08fc18443..c34cf1b99b 100644
--- a/rtl/ibex_top_tracing.sv
+++ b/rtl/ibex_top_tracing.sv
@@ -15,6 +15,7 @@ module ibex_top_tracing import ibex_pkg::*; #(
   parameter bit          RV32E            = 1'b0,
   parameter rv32m_e      RV32M            = RV32MFast,
   parameter rv32b_e      RV32B            = RV32BNone,
+  parameter rv32zk_e     RV32Zk           = RV32ZkNone,
   parameter regfile_e    RegFile          = RegFileFF,
   parameter bit          BranchTargetALU  = 1'b0,
   parameter bit          WritebackStage   = 1'b0,
@@ -136,6 +137,7 @@ module ibex_top_tracing import ibex_pkg::*; #(
     .RV32E            ( RV32E            ),
     .RV32M            ( RV32M            ),
     .RV32B            ( RV32B            ),
+    .RV32Zk           ( RV32Zk            ),
     .RegFile          ( RegFile          ),
     .BranchTargetALU  ( BranchTargetALU  ),
     .ICache           ( ICache           ),

From 1ef9340584e51aab94df6edc9b554e7c60b4ea57 Mon Sep 17 00:00:00 2001
From: phthinh <hung3@e.ntu.edu.sg>
Date: Thu, 2 Dec 2021 13:28:00 +0000
Subject: [PATCH 13/21] ZK: add --RV32Zk argument for Fusesoc.

---
 dv/riscv_compliance/ibex_riscv_compliance.core            | 7 +++++++
 dv/riscv_compliance/rtl/ibex_riscv_compliance.sv          | 8 ++++++++
 .../simple_system_cosim/ibex_simple_system_cosim.core     | 7 +++++++
 examples/simple_system/rtl/ibex_simple_system.sv          | 4 ++--
 ibex_top_tracing.core                                     | 7 +++++++
 5 files changed, 31 insertions(+), 2 deletions(-)

diff --git a/dv/riscv_compliance/ibex_riscv_compliance.core b/dv/riscv_compliance/ibex_riscv_compliance.core
index edc7fcbffa..74be84b0a8 100644
--- a/dv/riscv_compliance/ibex_riscv_compliance.core
+++ b/dv/riscv_compliance/ibex_riscv_compliance.core
@@ -42,6 +42,12 @@ parameters:
     paramtype: vlogdefine
     description: "Bitmanip implementation parameter enum. See the ibex_pkg::rv32b_e enum in ibex_pkg.sv for permitted values."
 
+  RV32Zk:
+    datatype: str
+    default: ibex_pkg::RV32ZkNone
+    paramtype: vlogdefine
+    description: "Zk cryptography extension implementation parameter enum. See the ibex_pkg::rv32zk_e enum in ibex_pkg.sv for permitted values."
+
   RegFile:
     datatype: str
     default: ibex_pkg::RegFileFF
@@ -112,6 +118,7 @@ targets:
       - RV32E
       - RV32M
       - RV32B
+      - RV32Zk
       - RegFile
       - ICache
       - ICacheECC
diff --git a/dv/riscv_compliance/rtl/ibex_riscv_compliance.sv b/dv/riscv_compliance/rtl/ibex_riscv_compliance.sv
index 5441a13561..b85098cc94 100644
--- a/dv/riscv_compliance/rtl/ibex_riscv_compliance.sv
+++ b/dv/riscv_compliance/rtl/ibex_riscv_compliance.sv
@@ -10,6 +10,11 @@
  * simulators (if the top-level clk and rst ports are replaced with a generated
  * clock).
  */
+
+`ifndef RV32Zk
+  `define RV32Zk ibex_pkg::RV32ZkNone
+`endif
+
 module ibex_riscv_compliance (
   input IO_CLK,
   input IO_RST_N
@@ -21,6 +26,8 @@ module ibex_riscv_compliance (
   parameter bit RV32E                   = 1'b0;
   parameter ibex_pkg::rv32m_e RV32M     = ibex_pkg::RV32MFast;
   parameter ibex_pkg::rv32b_e RV32B     = ibex_pkg::RV32BNone;
+  parameter ibex_pkg::rv32zk_e RV32Zk   = `RV32Zk;
+
   parameter ibex_pkg::regfile_e RegFile = ibex_pkg::RegFileFF;
   parameter bit BranchTargetALU         = 1'b0;
   parameter bit WritebackStage          = 1'b0;
@@ -120,6 +127,7 @@ module ibex_riscv_compliance (
       .RV32E           (RV32E           ),
       .RV32M           (RV32M           ),
       .RV32B           (RV32B           ),
+      .RV32Zk          ( RV32Zk         ),
       .RegFile         (RegFile         ),
       .BranchTargetALU (BranchTargetALU ),
       .WritebackStage  (WritebackStage  ),
diff --git a/dv/verilator/simple_system_cosim/ibex_simple_system_cosim.core b/dv/verilator/simple_system_cosim/ibex_simple_system_cosim.core
index bc9acfcaa2..8429602407 100644
--- a/dv/verilator/simple_system_cosim/ibex_simple_system_cosim.core
+++ b/dv/verilator/simple_system_cosim/ibex_simple_system_cosim.core
@@ -35,6 +35,12 @@ parameters:
     paramtype: vlogdefine
     description: "Bitmanip implementation parameter enum. See the ibex_pkg::rv32b_e enum in ibex_pkg.sv for permitted values."
 
+  RV32Zk:
+    datatype: str
+    default: ibex_pkg::RV32ZkNone
+    paramtype: vlogdefine
+    description: "Zk cryptography extension implementation parameter enum. See the ibex_pkg::rv32zk_e enum in ibex_pkg.sv for permitted values."
+
   RegFile:
     datatype: str
     default: ibex_pkg::RegFileFF
@@ -109,6 +115,7 @@ targets:
       - RV32E
       - RV32M
       - RV32B
+      - RV32Zk
       - RegFile
       - ICache
       - ICacheECC
diff --git a/examples/simple_system/rtl/ibex_simple_system.sv b/examples/simple_system/rtl/ibex_simple_system.sv
index 62a350e274..ae9540ad03 100644
--- a/examples/simple_system/rtl/ibex_simple_system.sv
+++ b/examples/simple_system/rtl/ibex_simple_system.sv
@@ -14,8 +14,8 @@
   `define RV32B ibex_pkg::RV32BNone
 `endif
 
-`ifndef RV32B
-  `define RV32B ibex_pkg::RV32ZkNone
+`ifndef RV32Zk
+  `define RV32Zk ibex_pkg::RV32ZkNone
 `endif
 
 `ifndef RegFile
diff --git a/ibex_top_tracing.core b/ibex_top_tracing.core
index 550e5a18b6..1cff3fce4b 100644
--- a/ibex_top_tracing.core
+++ b/ibex_top_tracing.core
@@ -41,6 +41,12 @@ parameters:
     paramtype: vlogdefine
     description: "Bitmanip implementation parameter enum. See the ibex_pkg::rv32b_e enum in ibex_pkg.sv for permitted values."
 
+  RV32Zk:
+    datatype: str
+    default: ibex_pkg::RV32ZkNone
+    paramtype: vlogdefine
+    description: "Zk cryptography extension implementation parameter enum. See the ibex_pkg::rv32zk_e enum in ibex_pkg.sv for permitted values."
+
   RegFile:
     datatype: str
     default: ibex_pkg::RegFileFF
@@ -117,6 +123,7 @@ targets:
       - RV32E
       - RV32M
       - RV32B
+      - RV32Zk
       - RegFile
       - ICache
       - ICacheECC

From 6393a5dc13e6f60e1e18e8293ffc8cf0289e76b9 Mon Sep 17 00:00:00 2001
From: phthinh <hung3@e.ntu.edu.sg>
Date: Thu, 2 Dec 2021 16:08:07 +0000
Subject: [PATCH 14/21] ZK: edit rtl code style for veriblelint check.

---
 ibex_configs.yaml      |   4 +-
 rtl/ibex_decoder.sv    | 229 +++++++++++++++++++++++------------------
 rtl/ibex_ex_block.sv   |   2 +-
 rtl/ibex_poly16_mul.sv |  51 ++++++---
 rtl/ibex_zk.sv         |  28 ++---
 5 files changed, 184 insertions(+), 130 deletions(-)

diff --git a/ibex_configs.yaml b/ibex_configs.yaml
index 9b377dda80..16407922d5 100644
--- a/ibex_configs.yaml
+++ b/ibex_configs.yaml
@@ -118,7 +118,7 @@ experimental-maxperf-pmp-bmfull:
 experimental-maxperf-pmp-zkn:
   RV32E                    : 0
   RV32M                    : "ibex_pkg::RV32MSingleCycle"
-  RV32B                    : "ibex_pkg::RV32BFull"
+  RV32B                    : "ibex_pkg::RV32BNone"
   RV32Zk                   : "ibex_pkg::RV32Zkn"
   RegFile                  : "ibex_pkg::RegFileFF"
   BranchTargetALU          : 1
@@ -135,7 +135,7 @@ experimental-maxperf-pmp-zkn:
 experimental-maxperf-pmp-zks:
   RV32E                    : 0
   RV32M                    : "ibex_pkg::RV32MSingleCycle"
-  RV32B                    : "ibex_pkg::RV32BFull"
+  RV32B                    : "ibex_pkg::RV32BNone"
   RV32Zk                   : "ibex_pkg::RV32Zks"
   RegFile                  : "ibex_pkg::RegFileFF"
   BranchTargetALU          : 1
diff --git a/rtl/ibex_decoder.sv b/rtl/ibex_decoder.sv
index 4e6a157b46..07994e64ee 100644
--- a/rtl/ibex_decoder.sv
+++ b/rtl/ibex_decoder.sv
@@ -368,9 +368,13 @@ module ibex_decoder #(
               5'b0_0101,                                                              // sbseti
               5'b0_1101: illegal_insn = (RV32B != RV32BNone) ? 1'b0 : 1'b1;           // sbinvi
               5'b0_0001: if (instr[26] == 1'b0) begin
-                if      (RV32B  == RV32BFull ) illegal_insn = 1'b0;                   // shfl
-                else if (RV32Zk != RV32ZkNone) illegal_insn = (instr[25:20] == 6'b00_1111) ? 1'b0 : 1'b1; //zip
-                else illegal_insn = 1'b1;
+                if (RV32B == RV32BFull ) begin
+                  illegal_insn = 1'b0;                                                // shfl
+                end else if (RV32Zk != RV32ZkNone) begin
+                  illegal_insn = (instr[25:20] == 6'b00_1111) ? 1'b0 : 1'b1;          //zip
+                end else begin
+                  illegal_insn = 1'b1;
+                end
               end else begin
                 illegal_insn = 1'b1;
               end
@@ -443,9 +447,13 @@ module ibex_decoder #(
                 end
                 5'b0_0001: begin
                   if (instr[26] == 1'b0) begin
-                    if      (RV32B  == RV32BFull ) illegal_insn = 1'b0;               // unshfl
-                    else if (RV32Zk != RV32ZkNone) illegal_insn = (instr[25:20] == 6'b00_1111) ? 1'b0 : 1'b1; //unzip
-                    else illegal_insn = 1'b1;
+                    if (RV32B  == RV32BFull ) begin
+                      illegal_insn = 1'b0;                                             // unshfl
+                    end else if (RV32Zk != RV32ZkNone) begin
+                      illegal_insn = (instr[25:20] == 6'b00_1111) ? 1'b0 : 1'b1;       //unzip
+                    end else begin
+                      illegal_insn = 1'b1;
+                    end
                   end else begin
                     illegal_insn = 1'b1;
                   end
@@ -467,7 +475,7 @@ module ibex_decoder #(
         if ({instr[26], instr[13:12]} == {1'b1, 2'b01}) begin
           illegal_insn = (RV32B != RV32BNone) ? 1'b0 : 1'b1; // cmix / cmov / fsl / fsr
         end else if ({instr[29:28],instr[25], instr[14:12]} == {3'b10__1, 3'b000}) begin
-          illegal_insn = (RV32Zk == RV32Zkn) ? 1'b0 : 1'b1; // aes32ds / aes32dsm / aes32es / aes32esm
+          illegal_insn = (RV32Zk == RV32Zkn) ? 1'b0 : 1'b1; // aes32ds/es aes32dsm/esm
         end else if ({instr[29:27],instr[25], instr[14:12]} == {4'b110_0, 3'b000}) begin
           illegal_insn = (RV32Zk == RV32Zks) ? 1'b0 : 1'b1; // sm4ed / sm4ks
         end else begin
@@ -857,64 +865,76 @@ module ibex_decoder #(
           3'b111: alu_operator_o = ALU_AND;  // And with Immediate
 
           3'b001: begin
-            unique case (instr_alu[31:27])
-              5'b0_0000: alu_operator_o = ALU_SLL;    // Shift Left Logical by Immediate
-              5'b0_0100: if (RV32B != RV32BNone) alu_operator_o = ALU_SLO  ;  // Shift Left Ones by Immediate
-              5'b0_1001: if (RV32B != RV32BNone) alu_operator_o = ALU_SBCLR;  // Clear bit specified by immediate
-              5'b0_0101: if (RV32B != RV32BNone) alu_operator_o = ALU_SBSET;  // Set bit specified by immediate
-              5'b0_1101: if (RV32B != RV32BNone) alu_operator_o = ALU_SBINV;  // Invert bit specified by immediate.
-              5'b0_0001: if (instr_alu[26] == 0) begin
-                 if (RV32B == RV32BFull)        alu_operator_o = ALU_SHFL;    // Shuffle with Immediate Control Value
-                 else if (RV32Zk != RV32ZkNone) alu_operator_o = (instr[25:20] == 6'b00_1111) ? ZKB_ZIP : ALU_SLL; //zbk_zip
-                 else                           alu_operator_o = ALU_SLL;
-              end
-              5'b0_1100: begin
-                unique case (instr_alu[26:20])
-                  7'b000_0000: if (RV32B != RV32BNone) alu_operator_o = ALU_CLZ  ; // clz
-                  7'b000_0001: if (RV32B != RV32BNone) alu_operator_o = ALU_CTZ  ; // ctz
-                  7'b000_0010: if (RV32B != RV32BNone) alu_operator_o = ALU_PCNT ; // pcnt
-                  7'b000_0100: if (RV32B != RV32BNone) alu_operator_o = ALU_SEXTB; // sext.b
-                  7'b000_0101: if (RV32B != RV32BNone) alu_operator_o = ALU_SEXTH; // sext.h
-                  7'b001_0000: if (RV32B == RV32BFull) begin
-                    alu_operator_o   = ALU_CRC32_B;  // crc32.b
-                    alu_multicycle_o = 1'b1;
-                  end
-                  7'b001_0001: if (RV32B == RV32BFull) begin
-                    alu_operator_o   = ALU_CRC32_H;  // crc32.h
-                    alu_multicycle_o = 1'b1;
-                  end
-                  7'b001_0010: if (RV32B == RV32BFull) begin
-                    alu_operator_o   = ALU_CRC32_W;  // crc32.w
-                    alu_multicycle_o = 1'b1;
-                  end
-                  7'b001_1000: if (RV32B == RV32BFull) begin
-                    alu_operator_o   = ALU_CRC32C_B; // crc32c.b
-                    alu_multicycle_o = 1'b1;
-                  end
-                  7'b001_1001: if (RV32B == RV32BFull) begin
-                    alu_operator_o   = ALU_CRC32C_H; // crc32c.h
-                    alu_multicycle_o = 1'b1;
-                  end
-                  7'b001_1010: if (RV32B == RV32BFull) begin
-                    alu_operator_o   = ALU_CRC32C_W; // crc32c.w
-                    alu_multicycle_o = 1'b1;
-                  end
-                  default: ;
-                endcase
-              end
-              5'b0_0010: begin                                                         // Zk, zkh,zks
-                unique case (instr_alu[26:20])
-                  7'b000_0000: if (RV32Zk == RV32Zkn) alu_operator_o = ZKN_SHA256SUM0; // sha256sum0
-                  7'b000_0001: if (RV32Zk == RV32Zkn) alu_operator_o = ZKN_SHA256SUM1; // sha256sum1
-                  7'b000_0010: if (RV32Zk == RV32Zkn) alu_operator_o = ZKN_SHA256SIG0; // sha256sig0
-                  7'b000_0011: if (RV32Zk == RV32Zkn) alu_operator_o = ZKN_SHA256SIG1; // sha256sig1
-                  7'b000_1000: if (RV32Zk == RV32Zks) alu_operator_o = ZKS_SM3P0;      // sm3p0
-                  7'b000_1001: if (RV32Zk == RV32Zks) alu_operator_o = ZKS_SM3P1;      // sm3p1
-                  default:     alu_operator_o = ALU_SLL;
-                endcase
-              end
+            if (RV32B != RV32BNone) begin
+              unique case (instr_alu[31:27])
+                5'b0_0000: alu_operator_o = ALU_SLL;    // Shift Left Logical by Immediate
+                5'b0_0100: alu_operator_o = ALU_SLO  ;  // Shift Left Ones by Immediate
+                5'b0_1001: alu_operator_o = ALU_SBCLR;  // Clear bit specified by immediate
+                5'b0_0101: alu_operator_o = ALU_SBSET;  // Set bit specified by immediate
+                5'b0_1101: alu_operator_o = ALU_SBINV;  // Invert bit specified by immediate.
+
+                // Shuffle with Immediate Control Value
+                5'b0_0001: if (instr_alu[26] == 0) alu_operator_o = ALU_SHFL;
+                5'b0_1100: begin
+                  unique case (instr_alu[26:20])
+                    7'b000_0000: alu_operator_o = ALU_CLZ  ; // clz
+                    7'b000_0001: alu_operator_o = ALU_CTZ  ; // ctz
+                    7'b000_0010: alu_operator_o = ALU_PCNT ; // pcnt
+                    7'b000_0100: alu_operator_o = ALU_SEXTB; // sext.b
+                    7'b000_0101: alu_operator_o = ALU_SEXTH; // sext.h
+                    7'b001_0000: if (RV32B == RV32BFull) begin
+                      alu_operator_o   = ALU_CRC32_B;  // crc32.b
+                      alu_multicycle_o = 1'b1;
+                    end
+                    7'b001_0001: if (RV32B == RV32BFull) begin
+                      alu_operator_o   = ALU_CRC32_H;  // crc32.h
+                      alu_multicycle_o = 1'b1;
+                    end
+                    7'b001_0010: if (RV32B == RV32BFull) begin
+                      alu_operator_o   = ALU_CRC32_W;  // crc32.w
+                      alu_multicycle_o = 1'b1;
+                    end
+                    7'b001_1000: if (RV32B == RV32BFull) begin
+                      alu_operator_o   = ALU_CRC32C_B; // crc32c.b
+                      alu_multicycle_o = 1'b1;
+                    end
+                    7'b001_1001: if (RV32B == RV32BFull) begin
+                      alu_operator_o   = ALU_CRC32C_H; // crc32c.h
+                      alu_multicycle_o = 1'b1;
+                    end
+                    7'b001_1010: if (RV32B == RV32BFull) begin
+                      alu_operator_o   = ALU_CRC32C_W; // crc32c.w
+                      alu_multicycle_o = 1'b1;
+                    end
+                    default: ;
+                  endcase
+                end
+
+                default: alu_operator_o = ALU_SLL; // Shift Left Logical by Immediate
+              endcase
+            end else if (RV32Zk != RV32ZkNone) begin
+              unique case (instr_alu[31:27])
+                5'b0_0001: if (instr[26:20] == 7'b000_1111) alu_operator_o = ZKB_ZIP;//zbk_zip
+                5'b0_0010: begin  // zkn, zks
+                  unique case (instr_alu[26:20])
+                    // sha256sum0
+                    7'b000_0000: if (RV32Zk == RV32Zkn) alu_operator_o = ZKN_SHA256SUM0;
+                    // sha256sum1
+                    7'b000_0001: if (RV32Zk == RV32Zkn) alu_operator_o = ZKN_SHA256SUM1;
+                    // sha256sig0
+                    7'b000_0010: if (RV32Zk == RV32Zkn) alu_operator_o = ZKN_SHA256SIG0;
+                    // sha256sig1
+                    7'b000_0011: if (RV32Zk == RV32Zkn) alu_operator_o = ZKN_SHA256SIG1;
+                    // sm3p0
+                    7'b000_1000: if (RV32Zk == RV32Zks) alu_operator_o = ZKS_SM3P0;
+                    // sm3p1
+                    7'b000_1001: if (RV32Zk == RV32Zks) alu_operator_o = ZKS_SM3P1;
+                    default:     alu_operator_o = ALU_SLL;
+                  endcase
+                end
               default: alu_operator_o = ALU_SLL; // Shift Left Logical by Immediate
-            endcase
+              endcase
+            end
           end
 
           3'b101: begin
@@ -966,7 +986,6 @@ module ibex_decoder #(
               endcase
             end
           end
-
           default: ;
         endcase
       end
@@ -1037,7 +1056,7 @@ module ibex_decoder #(
             {7'b011_0000, 3'b001}: begin
               if (RV32B != RV32BNone) begin
                 alu_operator_o = ALU_ROL;   // rol
-                alu_multicycle_o = 1'b1;                
+                alu_multicycle_o = 1'b1;
               end else if ((RV32Zk != RV32ZkNone)) begin
                 alu_operator_o = ZKB_ROL;   // zbk_rol
               end
@@ -1124,44 +1143,58 @@ module ibex_decoder #(
             end
 
             // RV32Zk zbk
-            {7'b001_0100, 3'b100}: if (RV32Zk != RV32ZkNone) alu_operator_o = ZKB_XPERM8; // xperm8
-            {7'b001_0100, 3'b010}: if (RV32Zk != RV32ZkNone) alu_operator_o = ZKB_XPERM4; // xperm4
+            // xperm8
+            {7'b001_0100, 3'b100}: if (RV32Zk != RV32ZkNone) alu_operator_o = ZKB_XPERM8;
+            // xperm4
+            {7'b001_0100, 3'b010}: if (RV32Zk != RV32ZkNone) alu_operator_o = ZKB_XPERM4;
 
             // RV32Zk zkh
-            {7'b010_1000, 3'b000}: if (RV32Zk == RV32Zkn) alu_operator_o = ZKN_SHA512SUM0R; // sha512_sum0r
-            {7'b010_1001, 3'b000}: if (RV32Zk == RV32Zkn) alu_operator_o = ZKN_SHA512SUM1R; // sha512_sum1r
-            {7'b010_1010, 3'b000}: if (RV32Zk == RV32Zkn) alu_operator_o = ZKN_SHA512SIG0L; // sha512_sig0l
-            {7'b010_1011, 3'b000}: if (RV32Zk == RV32Zkn) alu_operator_o = ZKN_SHA512SIG1L; // sha512_sig1l
-            {7'b010_1110, 3'b000}: if (RV32Zk == RV32Zkn) alu_operator_o = ZKN_SHA512SIG0H; // sha512_sig0h
-            {7'b010_1111, 3'b000}: if (RV32Zk == RV32Zkn) alu_operator_o = ZKN_SHA512SIG1H; // sha512_sig1h
+            // sha512_sum0r
+            {7'b010_1000, 3'b000}: if (RV32Zk == RV32Zkn) alu_operator_o = ZKN_SHA512SUM0R;
+            // sha512_sum1r
+            {7'b010_1001, 3'b000}: if (RV32Zk == RV32Zkn) alu_operator_o = ZKN_SHA512SUM1R;
+            // sha512_sig0l
+            {7'b010_1010, 3'b000}: if (RV32Zk == RV32Zkn) alu_operator_o = ZKN_SHA512SIG0L;
+            // sha512_sig1l
+            {7'b010_1011, 3'b000}: if (RV32Zk == RV32Zkn) alu_operator_o = ZKN_SHA512SIG1L;
+            // sha512_sig0h
+            {7'b010_1110, 3'b000}: if (RV32Zk == RV32Zkn) alu_operator_o = ZKN_SHA512SIG0H;
+            // sha512_sig1h
+            {7'b010_1111, 3'b000}: if (RV32Zk == RV32Zkn) alu_operator_o = ZKN_SHA512SIG1H;
 
             // RV32Zk zkned
-            {7'b001_0001, 3'b000}: if (RV32Zk == RV32Zkn) alu_operator_o = ZKN_AES32ESB0;  // aes32esb0
-            {7'b011_0001, 3'b000}: if (RV32Zk == RV32Zkn) alu_operator_o = ZKN_AES32ESB1;  // aes32esb1
-            {7'b101_0001, 3'b000}: if (RV32Zk == RV32Zkn) alu_operator_o = ZKN_AES32ESB2;  // aes32esb2
-            {7'b111_0001, 3'b000}: if (RV32Zk == RV32Zkn) alu_operator_o = ZKN_AES32ESB3;  // aes32esb3
-            {7'b001_0011, 3'b000}: if (RV32Zk == RV32Zkn) alu_operator_o = ZKN_AES32ESMB0; // aes32esmb0
-            {7'b011_0011, 3'b000}: if (RV32Zk == RV32Zkn) alu_operator_o = ZKN_AES32ESMB1; // aes32esmb1
-            {7'b101_0011, 3'b000}: if (RV32Zk == RV32Zkn) alu_operator_o = ZKN_AES32ESMB2; // aes32esmb2
-            {7'b111_0011, 3'b000}: if (RV32Zk == RV32Zkn) alu_operator_o = ZKN_AES32ESMB3; // aes32esmb3
-            {7'b001_0101, 3'b000}: if (RV32Zk == RV32Zkn) alu_operator_o = ZKN_AES32DSB0;  // aes32dsb0
-            {7'b011_0101, 3'b000}: if (RV32Zk == RV32Zkn) alu_operator_o = ZKN_AES32DSB1;  // aes32dsb1
-            {7'b101_0101, 3'b000}: if (RV32Zk == RV32Zkn) alu_operator_o = ZKN_AES32DSB2;  // aes32dsb2
-            {7'b111_0101, 3'b000}: if (RV32Zk == RV32Zkn) alu_operator_o = ZKN_AES32DSB3;  // aes32dsb3
-            {7'b001_0111, 3'b000}: if (RV32Zk == RV32Zkn) alu_operator_o = ZKN_AES32DSMB0; // aes32dsmb0
-            {7'b011_0111, 3'b000}: if (RV32Zk == RV32Zkn) alu_operator_o = ZKN_AES32DSMB1; // aes32dsmb1
-            {7'b101_0111, 3'b000}: if (RV32Zk == RV32Zkn) alu_operator_o = ZKN_AES32DSMB2; // aes32dsmb2
-            {7'b111_0111, 3'b000}: if (RV32Zk == RV32Zkn) alu_operator_o = ZKN_AES32DSMB3; // aes32dsmb3
+            // aes32es
+            {7'b001_0001, 3'b000}: if (RV32Zk == RV32Zkn) alu_operator_o = ZKN_AES32ESB0;
+            {7'b011_0001, 3'b000}: if (RV32Zk == RV32Zkn) alu_operator_o = ZKN_AES32ESB1;
+            {7'b101_0001, 3'b000}: if (RV32Zk == RV32Zkn) alu_operator_o = ZKN_AES32ESB2;
+            {7'b111_0001, 3'b000}: if (RV32Zk == RV32Zkn) alu_operator_o = ZKN_AES32ESB3;
+            // aes32esm
+            {7'b001_0011, 3'b000}: if (RV32Zk == RV32Zkn) alu_operator_o = ZKN_AES32ESMB0;
+            {7'b011_0011, 3'b000}: if (RV32Zk == RV32Zkn) alu_operator_o = ZKN_AES32ESMB1;
+            {7'b101_0011, 3'b000}: if (RV32Zk == RV32Zkn) alu_operator_o = ZKN_AES32ESMB2;
+            {7'b111_0011, 3'b000}: if (RV32Zk == RV32Zkn) alu_operator_o = ZKN_AES32ESMB3;
+            // aes32dsb0
+            {7'b001_0101, 3'b000}: if (RV32Zk == RV32Zkn) alu_operator_o = ZKN_AES32DSB0;
+            {7'b011_0101, 3'b000}: if (RV32Zk == RV32Zkn) alu_operator_o = ZKN_AES32DSB1;
+            {7'b101_0101, 3'b000}: if (RV32Zk == RV32Zkn) alu_operator_o = ZKN_AES32DSB2;
+            {7'b111_0101, 3'b000}: if (RV32Zk == RV32Zkn) alu_operator_o = ZKN_AES32DSB3;
+             // aes32dsmb0
+            {7'b001_0111, 3'b000}: if (RV32Zk == RV32Zkn) alu_operator_o = ZKN_AES32DSMB0;
+            {7'b011_0111, 3'b000}: if (RV32Zk == RV32Zkn) alu_operator_o = ZKN_AES32DSMB1;
+            {7'b101_0111, 3'b000}: if (RV32Zk == RV32Zkn) alu_operator_o = ZKN_AES32DSMB2;
+            {7'b111_0111, 3'b000}: if (RV32Zk == RV32Zkn) alu_operator_o = ZKN_AES32DSMB3;
 
             // RV32Zk zks
-            {7'b001_1000, 3'b000}: if (RV32Zk == RV32Zks) alu_operator_o = ZKS_SM4EDB0; // sm4edb0
-            {7'b011_1000, 3'b000}: if (RV32Zk == RV32Zks) alu_operator_o = ZKS_SM4EDB1; // sm4edb1
-            {7'b101_1000, 3'b000}: if (RV32Zk == RV32Zks) alu_operator_o = ZKS_SM4EDB2; // sm4edb2
-            {7'b111_1000, 3'b000}: if (RV32Zk == RV32Zks) alu_operator_o = ZKS_SM4EDB3; // sm4edb3
-            {7'b001_1010, 3'b000}: if (RV32Zk == RV32Zks) alu_operator_o = ZKS_SM4KSB0; // sm4ksb0
-            {7'b011_1010, 3'b000}: if (RV32Zk == RV32Zks) alu_operator_o = ZKS_SM4KSB1; // sm4ksb1
-            {7'b101_1010, 3'b000}: if (RV32Zk == RV32Zks) alu_operator_o = ZKS_SM4KSB2; // sm4ksb2
-            {7'b111_1010, 3'b000}: if (RV32Zk == RV32Zks) alu_operator_o = ZKS_SM4KSB3; // sm4ksb3
+            // sm4edb0
+            {7'b001_1000, 3'b000}: if (RV32Zk == RV32Zks) alu_operator_o = ZKS_SM4EDB0;
+            {7'b011_1000, 3'b000}: if (RV32Zk == RV32Zks) alu_operator_o = ZKS_SM4EDB1;
+            {7'b101_1000, 3'b000}: if (RV32Zk == RV32Zks) alu_operator_o = ZKS_SM4EDB2;
+            {7'b111_1000, 3'b000}: if (RV32Zk == RV32Zks) alu_operator_o = ZKS_SM4EDB3;
+            // sm4ksb0
+            {7'b001_1010, 3'b000}: if (RV32Zk == RV32Zks) alu_operator_o = ZKS_SM4KSB0;
+            {7'b011_1010, 3'b000}: if (RV32Zk == RV32Zks) alu_operator_o = ZKS_SM4KSB1;
+            {7'b101_1010, 3'b000}: if (RV32Zk == RV32Zks) alu_operator_o = ZKS_SM4KSB2;
+            {7'b111_1010, 3'b000}: if (RV32Zk == RV32Zks) alu_operator_o = ZKS_SM4KSB3;
 
             // RV32M instructions, all use the same ALU operation
             {7'b000_0001, 3'b000}: begin // mul
diff --git a/rtl/ibex_ex_block.sv b/rtl/ibex_ex_block.sv
index 672ee012d7..a7ad75532a 100644
--- a/rtl/ibex_ex_block.sv
+++ b/rtl/ibex_ex_block.sv
@@ -148,7 +148,7 @@ module ibex_ex_block #(
     .result_o           (zke_result),
     .zk_val_o           (zke_val)
     );
-  end else begin : no_gen_Zkn
+  end else begin : gen_no_Zkn
   assign zke_result = 32'd0;
   assign zke_val    =  1'b0;
   end
diff --git a/rtl/ibex_poly16_mul.sv b/rtl/ibex_poly16_mul.sv
index b8b8b74e9d..6a3329dae4 100644
--- a/rtl/ibex_poly16_mul.sv
+++ b/rtl/ibex_poly16_mul.sv
@@ -15,25 +15,41 @@ input  logic [15:0] b,
 output logic [31:0] r
 );
 
-  logic       t1,   t2,   t3,   t4,   t5,   t6,   t7,   t8,   t9,   t10,  t11,  t12,  t13,  t14,  t15,  t16,  t17,  t18,  t19;
-  logic t20,  t21,  t22,  t23,  t24,  t25,  t26,  t27,  t28,  t29,  t30,  t31,  t32,  t33,  t34,  t35,  t36,  t37,  t38,  t39;
-  logic t40,  t41,  t42,  t43,  t44,  t45,  t46,  t47,  t48,  t49,  t50,  t51,  t52,  t53,  t54,  t55,  t56,  t57,  t58,  t59;
-  logic t60,  t61,  t62,  t63,  t64,  t65,  t66,  t67,  t68,  t69,  t70,  t71,  t72,  t73,  t74,  t75,  t76,  t77,  t78,  t79;
-  logic t80,  t81,  t82,  t83,  t84,  t85,  t86,  t87,  t88,  t89,  t90,  t91,  t92,  t93,  t94,  t95,  t96,  t97,  t98,  t99;
+  logic       t1,   t2,   t3,   t4,   t5,   t6,   t7,   t8,   t9 ;
+  logic t10,  t11,  t12,  t13,  t14,  t15,  t16,  t17,  t18,  t19;
+  logic t20,  t21,  t22,  t23,  t24,  t25,  t26,  t27,  t28,  t29;
+  logic t30,  t31,  t32,  t33,  t34,  t35,  t36,  t37,  t38,  t39;
+  logic t40,  t41,  t42,  t43,  t44,  t45,  t46,  t47,  t48,  t49;
+  logic t50,  t51,  t52,  t53,  t54,  t55,  t56,  t57,  t58,  t59;
+  logic t60,  t61,  t62,  t63,  t64,  t65,  t66,  t67,  t68,  t69;
+  logic t70,  t71,  t72,  t73,  t74,  t75,  t76,  t77,  t78,  t79;
+  logic t80,  t81,  t82,  t83,  t84,  t85,  t86,  t87,  t88,  t89;
+  logic t90,  t91,  t92,  t93,  t94,  t95,  t96,  t97,  t98,  t99;
 
-  logic t100, t101, t102, t103, t104, t105, t106, t107, t108, t109, t110, t111, t112, t113, t114, t115, t116, t117, t118, t119;
-  logic t120, t121, t122, t123, t124, t125, t126, t127, t128, t129, t130, t131, t132, t133, t134, t135, t136, t137, t138, t139;
-  logic t140, t141, t142, t143, t144, t145, t146, t147, t148, t149, t150, t151, t152, t153, t154, t155, t156, t157, t158, t159;
-  logic t160, t161, t162, t163, t164, t165, t166, t167, t168, t169, t170, t171, t172, t173, t174, t175, t176, t177, t178, t179;
-  logic t180, t181, t182, t183, t184, t185, t186, t187, t188, t189, t190, t191, t192, t193, t194, t195, t196, t197, t198, t199;
+  logic t100, t101, t102, t103, t104, t105, t106, t107, t108, t109;
+  logic t110, t111, t112, t113, t114, t115, t116, t117, t118, t119;
+  logic t120, t121, t122, t123, t124, t125, t126, t127, t128, t129;
+  logic t130, t131, t132, t133, t134, t135, t136, t137, t138, t139;
+  logic t140, t141, t142, t143, t144, t145, t146, t147, t148, t149;
+  logic t150, t151, t152, t153, t154, t155, t156, t157, t158, t159;
+  logic t160, t161, t162, t163, t164, t165, t166, t167, t168, t169;
+  logic t170, t171, t172, t173, t174, t175, t176, t177, t178, t179;
+  logic t180, t181, t182, t183, t184, t185, t186, t187, t188, t189;
+  logic t190, t191, t192, t193, t194, t195, t196, t197, t198, t199;
 
-  logic t200, t201, t202, t203, t204, t205, t206, t207, t208, t209, t210, t211, t212, t213, t214, t215, t216, t217, t218, t219;
-  logic t220, t221, t222, t223, t224, t225, t226, t227, t228, t229, t230, t231, t232, t233, t234, t235, t236, t237, t238, t239;
-  logic t240, t241, t242, t243, t244, t245, t246, t247, t248, t249, t250, t251, t252, t253, t254, t255, t256, t257, t258, t259;
-  logic t260, t261, t262, t263, t264, t265, t266, t267, t268, t269, t270, t271, t272, t273, t274, t275, t276, t277, t278, t279;
-  logic t280, t281, t282, t283, t284, t285, t286, t287, t288, t289, t290, t291, t292, t293, t294, t295, t296, t297, t298, t299;
+  logic t200, t201, t202, t203, t204, t205, t206, t207, t208, t209;
+  logic t210, t211, t212, t213, t214, t215, t216, t217, t218, t219;
+  logic t220, t221, t222, t223, t224, t225, t226, t227, t228, t229;
+  logic t230, t231, t232, t233, t234, t235, t236, t237, t238, t239;
+  logic t240, t241, t242, t243, t244, t245, t246, t247, t248, t249;
+  logic t250, t251, t252, t253, t254, t255, t256, t257, t258, t259;
+  logic t260, t261, t262, t263, t264, t265, t266, t267, t268, t269;
+  logic t270, t271, t272, t273, t274, t275, t276, t277, t278, t279;
+  logic t280, t281, t282, t283, t284, t285, t286, t287, t288, t289;
+  logic t290, t291, t292, t293, t294, t295, t296, t297, t298, t299;
 
-  logic t300, t301, t302, t303, t304, t305, t306, t307, t308, t309, t310, t311, t312, t313, t314, t315, t316, t317, t318;
+  logic t300, t301, t302, t303, t304, t305, t306, t307, t308, t309;
+  logic t310, t311, t312, t313, t314, t315, t316, t317, t318;
 
   logic z0,  z1,  z2,  z3,  z4,  z5,  z6,  z7,  z8,  z9;
   logic z10, z11, z12, z13, z14, z15, z16, z17, z18, z19;
@@ -390,5 +406,6 @@ output logic [31:0] r
   assign t318 = t316 ^ t307;
   assign  z15 = t318 ^ t317;
 
-  assign r = {1'b0,z30,z29,z28,z27,z26,z25,z24,z23,z22,z21,z20,z19,z18,z17,z16,z15,z14,z13,z12,z11,z10,z9,z8,z7,z6,z5,z4,z3,z2,z1,z0};
+  assign r = {1'b0,z30,z29,z28,z27,z26,z25,z24,z23,z22,z21,z20,z19,z18,z17,z16,
+               z15,z14,z13,z12,z11,z10, z9, z8, z7, z6, z5, z4, z3, z2, z1,z0};
 endmodule
diff --git a/rtl/ibex_zk.sv b/rtl/ibex_zk.sv
index dc7ba9773a..e2919d57c1 100644
--- a/rtl/ibex_zk.sv
+++ b/rtl/ibex_zk.sv
@@ -150,7 +150,7 @@ endfunction
 
     // Xperm instructions
     // indexable access 4-bit LUT.
-    logic [ 3:0] lut_4b [7:0];
+    logic [ 3:0] lut_4b [8];
     logic [31:0] wxperm4;
     for(genvar i = 0; i < 8; i = i + 1) begin : gen_lut_xperm4
       // generate table.
@@ -165,7 +165,7 @@ endfunction
     end
 
    // indexable access 8-bit LUT.
-    logic [ 7:0] lut_8b [3:0];
+    logic [ 7:0] lut_8b [4];
     logic [31:0] wxperm8;
     for(genvar i = 0; i < 4; i = i + 1) begin : gen_lut_xperm8
       // generate table.
@@ -191,9 +191,9 @@ endfunction
     assign rhs2 = operand_b_i[15: 0];
 
     logic [31:0]  polymul0, polymul1, polymul2;
-    ibex_poly16_mul mul16_ins0(lhs0, rhs0, polymul0);
-    ibex_poly16_mul mul16_ins1(lhs1, rhs1, polymul1);
-    ibex_poly16_mul mul16_ins2(lhs2, rhs2, polymul2);
+    ibex_poly16_mul mul16_ins0(.a(lhs0), .b(rhs0), .r(polymul0));
+    ibex_poly16_mul mul16_ins1(.a(lhs1), .b(rhs1), .r(polymul1));
+    ibex_poly16_mul mul16_ins2(.a(lhs2), .b(rhs2), .r(polymul2));
 
     logic [31:0] wclmull, wclmulh, clmulm;
     assign clmulm  = polymul1 ^ polymul2;
@@ -219,7 +219,7 @@ endfunction
                         {32{clmulh_sel}} & wclmulh |
                         {32{xperm8_sel}} & wxperm8 |
                         {32{xperm4_sel}} & wxperm4 ;
-  end else begin : no_gen_zkb
+  end else begin : gen_no_zkb
     assign zkb_val    =  1'b0;
     assign zkb_result = 32'd0;
   end
@@ -301,10 +301,14 @@ endfunction
 
 
     logic[31:0]  sha256_sum0, sha256_sum1, sha256_sig0, sha256_sig1;
-    assign sha256_sig0  = `RORI32(operand_a_i, 7) ^ `RORI32(operand_a_i,18) ^ `SRLI32(operand_a_i, 3);
-    assign sha256_sig1  = `RORI32(operand_a_i,17) ^ `RORI32(operand_a_i,19) ^ `SRLI32(operand_a_i,10);
-    assign sha256_sum0  = `RORI32(operand_a_i, 2) ^ `RORI32(operand_a_i,13) ^ `RORI32(operand_a_i,22);
-    assign sha256_sum1  = `RORI32(operand_a_i, 6) ^ `RORI32(operand_a_i,11) ^ `RORI32(operand_a_i,25);
+    assign sha256_sig0 = `RORI32(operand_a_i, 7) ^ `RORI32(operand_a_i,18) ^
+                         `SRLI32(operand_a_i, 3);
+    assign sha256_sig1 = `RORI32(operand_a_i,17) ^ `RORI32(operand_a_i,19) ^
+                         `SRLI32(operand_a_i,10);
+    assign sha256_sum0 = `RORI32(operand_a_i, 2) ^ `RORI32(operand_a_i,13) ^
+                         `RORI32(operand_a_i,22);
+    assign sha256_sum1 = `RORI32(operand_a_i, 6) ^ `RORI32(operand_a_i,11) ^
+                         `RORI32(operand_a_i,25);
 
     logic[31:0]  sha512_sum0r, sha512_sum1r;
     logic[31:0]  sha512_sig0l, sha512_sig1l;
@@ -337,7 +341,7 @@ endfunction
                           {32{sha512_sig0h_sel}} & sha512_sig0h |
                           {32{sha512_sig1l_sel}} & sha512_sig1l |
                           {32{sha512_sig1h_sel}} & sha512_sig1h ;
-  end else begin : no_gen_zkn
+  end else begin : gen_no_zkn
     assign zkn_val    =  1'b0;
     assign zkn_result = 32'd0;
   end
@@ -401,7 +405,7 @@ endfunction
                         {32{sm4ks_sel}} & sm4    |
                         {32{sm3p0_sel}} & sm3_p0 |
                         {32{sm3p1_sel}} & sm3_p1 ;
-  end else begin : no_gen_zks
+  end else begin : gen_no_zks
     assign zks_val    =  1'b0;
     assign zks_result = 32'd0;
   end

From c66cfbc569016b4c95cea4875999031364a6950d Mon Sep 17 00:00:00 2001
From: phthinh <hung3@e.ntu.edu.sg>
Date: Thu, 2 Dec 2021 23:06:03 +0000
Subject: [PATCH 15/21] ZK: fix a define-style issue of RV32B in
 riscv_compliance test.

---
 dv/riscv_compliance/rtl/ibex_riscv_compliance.sv | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

diff --git a/dv/riscv_compliance/rtl/ibex_riscv_compliance.sv b/dv/riscv_compliance/rtl/ibex_riscv_compliance.sv
index b85098cc94..8c3b1482b1 100644
--- a/dv/riscv_compliance/rtl/ibex_riscv_compliance.sv
+++ b/dv/riscv_compliance/rtl/ibex_riscv_compliance.sv
@@ -11,10 +11,6 @@
  * clock).
  */
 
-`ifndef RV32Zk
-  `define RV32Zk ibex_pkg::RV32ZkNone
-`endif
-
 module ibex_riscv_compliance (
   input IO_CLK,
   input IO_RST_N
@@ -26,7 +22,7 @@ module ibex_riscv_compliance (
   parameter bit RV32E                   = 1'b0;
   parameter ibex_pkg::rv32m_e RV32M     = ibex_pkg::RV32MFast;
   parameter ibex_pkg::rv32b_e RV32B     = ibex_pkg::RV32BNone;
-  parameter ibex_pkg::rv32zk_e RV32Zk   = `RV32Zk;
+  parameter ibex_pkg::rv32zk_e RV32Zk   = ibex_pkg::RV32ZkNone;
 
   parameter ibex_pkg::regfile_e RegFile = ibex_pkg::RegFileFF;
   parameter bit BranchTargetALU         = 1'b0;

From cd015f611e66ac39361d060a5a1746b385db9a4b Mon Sep 17 00:00:00 2001
From: phthinh <hung3@e.ntu.edu.sg>
Date: Thu, 2 Dec 2021 23:15:13 +0000
Subject: [PATCH 16/21] ZK: fix a define-style issue of RV32Zk.

---
 examples/simple_system/rtl/ibex_simple_system.sv | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/examples/simple_system/rtl/ibex_simple_system.sv b/examples/simple_system/rtl/ibex_simple_system.sv
index ae9540ad03..3f616f23a4 100644
--- a/examples/simple_system/rtl/ibex_simple_system.sv
+++ b/examples/simple_system/rtl/ibex_simple_system.sv
@@ -14,8 +14,8 @@
   `define RV32B ibex_pkg::RV32BNone
 `endif
 
-`ifndef RV32Zk
-  `define RV32Zk ibex_pkg::RV32ZkNone
+`ifndef RV32ZK
+  `define RV32ZK ibex_pkg::RV32ZkNone
 `endif
 
 `ifndef RegFile
@@ -46,7 +46,7 @@ module ibex_simple_system (
   parameter bit                 RV32E                    = 1'b0;
   parameter ibex_pkg::rv32m_e   RV32M                    = `RV32M;
   parameter ibex_pkg::rv32b_e   RV32B                    = `RV32B;
-  parameter ibex_pkg::rv32zk_e  RV32Zk                   = `RV32Zk;
+  parameter ibex_pkg::rv32zk_e  RV32Zk                   = `RV32ZK;
   parameter ibex_pkg::regfile_e RegFile                  = `RegFile;
   parameter bit                 BranchTargetALU          = 1'b0;
   parameter bit                 WritebackStage           = 1'b0;

From f3d43d0b172eaee8e734108fb26089f07c5a4733 Mon Sep 17 00:00:00 2001
From: phthinh <hung3@e.ntu.edu.sg>
Date: Fri, 3 Dec 2021 01:39:13 +0000
Subject: [PATCH 17/21] ZK: fix the rtl code style issues

---
 dv/riscv_compliance/rtl/ibex_riscv_compliance.sv |  2 +-
 examples/simple_system/rtl/ibex_simple_system.sv |  6 +++---
 rtl/ibex_decoder.sv                              | 14 ++++++++------
 3 files changed, 12 insertions(+), 10 deletions(-)

diff --git a/dv/riscv_compliance/rtl/ibex_riscv_compliance.sv b/dv/riscv_compliance/rtl/ibex_riscv_compliance.sv
index 8c3b1482b1..360b6ff782 100644
--- a/dv/riscv_compliance/rtl/ibex_riscv_compliance.sv
+++ b/dv/riscv_compliance/rtl/ibex_riscv_compliance.sv
@@ -123,7 +123,7 @@ module ibex_riscv_compliance (
       .RV32E           (RV32E           ),
       .RV32M           (RV32M           ),
       .RV32B           (RV32B           ),
-      .RV32Zk          ( RV32Zk         ),
+      .RV32Zk          (RV32Zk          ),
       .RegFile         (RegFile         ),
       .BranchTargetALU (BranchTargetALU ),
       .WritebackStage  (WritebackStage  ),
diff --git a/examples/simple_system/rtl/ibex_simple_system.sv b/examples/simple_system/rtl/ibex_simple_system.sv
index 3f616f23a4..ae9540ad03 100644
--- a/examples/simple_system/rtl/ibex_simple_system.sv
+++ b/examples/simple_system/rtl/ibex_simple_system.sv
@@ -14,8 +14,8 @@
   `define RV32B ibex_pkg::RV32BNone
 `endif
 
-`ifndef RV32ZK
-  `define RV32ZK ibex_pkg::RV32ZkNone
+`ifndef RV32Zk
+  `define RV32Zk ibex_pkg::RV32ZkNone
 `endif
 
 `ifndef RegFile
@@ -46,7 +46,7 @@ module ibex_simple_system (
   parameter bit                 RV32E                    = 1'b0;
   parameter ibex_pkg::rv32m_e   RV32M                    = `RV32M;
   parameter ibex_pkg::rv32b_e   RV32B                    = `RV32B;
-  parameter ibex_pkg::rv32zk_e  RV32Zk                   = `RV32ZK;
+  parameter ibex_pkg::rv32zk_e  RV32Zk                   = `RV32Zk;
   parameter ibex_pkg::regfile_e RegFile                  = `RegFile;
   parameter bit                 BranchTargetALU          = 1'b0;
   parameter bit                 WritebackStage           = 1'b0;
diff --git a/rtl/ibex_decoder.sv b/rtl/ibex_decoder.sv
index 07994e64ee..7f9dd796dc 100644
--- a/rtl/ibex_decoder.sv
+++ b/rtl/ibex_decoder.sv
@@ -868,7 +868,7 @@ module ibex_decoder #(
             if (RV32B != RV32BNone) begin
               unique case (instr_alu[31:27])
                 5'b0_0000: alu_operator_o = ALU_SLL;    // Shift Left Logical by Immediate
-                5'b0_0100: alu_operator_o = ALU_SLO  ;  // Shift Left Ones by Immediate
+                5'b0_0100: alu_operator_o = ALU_SLO;    // Shift Left Ones by Immediate
                 5'b0_1001: alu_operator_o = ALU_SBCLR;  // Clear bit specified by immediate
                 5'b0_0101: alu_operator_o = ALU_SBSET;  // Set bit specified by immediate
                 5'b0_1101: alu_operator_o = ALU_SBINV;  // Invert bit specified by immediate.
@@ -877,9 +877,9 @@ module ibex_decoder #(
                 5'b0_0001: if (instr_alu[26] == 0) alu_operator_o = ALU_SHFL;
                 5'b0_1100: begin
                   unique case (instr_alu[26:20])
-                    7'b000_0000: alu_operator_o = ALU_CLZ  ; // clz
-                    7'b000_0001: alu_operator_o = ALU_CTZ  ; // ctz
-                    7'b000_0010: alu_operator_o = ALU_PCNT ; // pcnt
+                    7'b000_0000: alu_operator_o = ALU_CLZ;   // clz
+                    7'b000_0001: alu_operator_o = ALU_CTZ;   // ctz
+                    7'b000_0010: alu_operator_o = ALU_PCNT;  // pcnt
                     7'b000_0100: alu_operator_o = ALU_SEXTB; // sext.b
                     7'b000_0101: alu_operator_o = ALU_SEXTH; // sext.h
                     7'b001_0000: if (RV32B == RV32BFull) begin
@@ -910,7 +910,7 @@ module ibex_decoder #(
                   endcase
                 end
 
-                default: alu_operator_o = ALU_SLL; // Shift Left Logical by Immediate
+                default: ;
               endcase
             end else if (RV32Zk != RV32ZkNone) begin
               unique case (instr_alu[31:27])
@@ -932,8 +932,10 @@ module ibex_decoder #(
                     default:     alu_operator_o = ALU_SLL;
                   endcase
                 end
-              default: alu_operator_o = ALU_SLL; // Shift Left Logical by Immediate
+              default: alu_operator_o = ALU_SLL;
               endcase
+            end else begin
+              alu_operator_o = ALU_SLL; // Shift Left Logical by Immediate
             end
           end
 

From 3a1eb7c62fcab217468b2c0e494e432141fad38e Mon Sep 17 00:00:00 2001
From: phthinh <hung3@e.ntu.edu.sg>
Date: Fri, 3 Dec 2021 11:21:28 +0000
Subject: [PATCH 18/21] ZK: stick to the macro name style rule, change RV32Zk
 -> RV32K.

---
 .../ibex_riscv_compliance.core                |  6 +++---
 .../rtl/ibex_riscv_compliance.sv              |  4 ++--
 .../ibex_simple_system_cosim.core             |  6 +++---
 .../simple_system/ibex_simple_system.core     |  6 +++---
 .../simple_system/rtl/ibex_simple_system.sv   |  8 ++++----
 ibex_configs.yaml                             | 20 +++++++++----------
 ibex_core.core                                |  4 ++--
 ibex_top_tracing.core                         |  6 +++---
 rtl/ibex_core.sv                              |  7 +++----
 rtl/ibex_lockstep.sv                          |  4 ++--
 rtl/ibex_top.sv                               |  6 +++---
 rtl/ibex_top_tracing.sv                       |  4 ++--
 12 files changed, 40 insertions(+), 41 deletions(-)

diff --git a/dv/riscv_compliance/ibex_riscv_compliance.core b/dv/riscv_compliance/ibex_riscv_compliance.core
index 74be84b0a8..87012c1fc1 100644
--- a/dv/riscv_compliance/ibex_riscv_compliance.core
+++ b/dv/riscv_compliance/ibex_riscv_compliance.core
@@ -42,11 +42,11 @@ parameters:
     paramtype: vlogdefine
     description: "Bitmanip implementation parameter enum. See the ibex_pkg::rv32b_e enum in ibex_pkg.sv for permitted values."
 
-  RV32Zk:
+  RV32K:
     datatype: str
     default: ibex_pkg::RV32ZkNone
     paramtype: vlogdefine
-    description: "Zk cryptography extension implementation parameter enum. See the ibex_pkg::rv32zk_e enum in ibex_pkg.sv for permitted values."
+    description: "Cryptography extension implementation parameter enum. See the ibex_pkg::rv32zk_e enum in ibex_pkg.sv for permitted values."
 
   RegFile:
     datatype: str
@@ -118,7 +118,7 @@ targets:
       - RV32E
       - RV32M
       - RV32B
-      - RV32Zk
+      - RV32K
       - RegFile
       - ICache
       - ICacheECC
diff --git a/dv/riscv_compliance/rtl/ibex_riscv_compliance.sv b/dv/riscv_compliance/rtl/ibex_riscv_compliance.sv
index 360b6ff782..e55fd0f925 100644
--- a/dv/riscv_compliance/rtl/ibex_riscv_compliance.sv
+++ b/dv/riscv_compliance/rtl/ibex_riscv_compliance.sv
@@ -22,7 +22,7 @@ module ibex_riscv_compliance (
   parameter bit RV32E                   = 1'b0;
   parameter ibex_pkg::rv32m_e RV32M     = ibex_pkg::RV32MFast;
   parameter ibex_pkg::rv32b_e RV32B     = ibex_pkg::RV32BNone;
-  parameter ibex_pkg::rv32zk_e RV32Zk   = ibex_pkg::RV32ZkNone;
+  parameter ibex_pkg::rv32zk_e RV32K    = ibex_pkg::RV32ZkNone;
 
   parameter ibex_pkg::regfile_e RegFile = ibex_pkg::RegFileFF;
   parameter bit BranchTargetALU         = 1'b0;
@@ -123,7 +123,7 @@ module ibex_riscv_compliance (
       .RV32E           (RV32E           ),
       .RV32M           (RV32M           ),
       .RV32B           (RV32B           ),
-      .RV32Zk          (RV32Zk          ),
+      .RV32K           (RV32K           ),
       .RegFile         (RegFile         ),
       .BranchTargetALU (BranchTargetALU ),
       .WritebackStage  (WritebackStage  ),
diff --git a/dv/verilator/simple_system_cosim/ibex_simple_system_cosim.core b/dv/verilator/simple_system_cosim/ibex_simple_system_cosim.core
index 8429602407..29710a2a2a 100644
--- a/dv/verilator/simple_system_cosim/ibex_simple_system_cosim.core
+++ b/dv/verilator/simple_system_cosim/ibex_simple_system_cosim.core
@@ -35,11 +35,11 @@ parameters:
     paramtype: vlogdefine
     description: "Bitmanip implementation parameter enum. See the ibex_pkg::rv32b_e enum in ibex_pkg.sv for permitted values."
 
-  RV32Zk:
+  RV32K:
     datatype: str
     default: ibex_pkg::RV32ZkNone
     paramtype: vlogdefine
-    description: "Zk cryptography extension implementation parameter enum. See the ibex_pkg::rv32zk_e enum in ibex_pkg.sv for permitted values."
+    description: "Cryptography extension implementation parameter enum. See the ibex_pkg::rv32zk_e enum in ibex_pkg.sv for permitted values."
 
   RegFile:
     datatype: str
@@ -115,7 +115,7 @@ targets:
       - RV32E
       - RV32M
       - RV32B
-      - RV32Zk
+      - RV32K
       - RegFile
       - ICache
       - ICacheECC
diff --git a/examples/simple_system/ibex_simple_system.core b/examples/simple_system/ibex_simple_system.core
index ba0ceb8039..5b633a2775 100644
--- a/examples/simple_system/ibex_simple_system.core
+++ b/examples/simple_system/ibex_simple_system.core
@@ -31,11 +31,11 @@ parameters:
     paramtype: vlogdefine
     description: "Bitmanip implementation parameter enum. See the ibex_pkg::rv32b_e enum in ibex_pkg.sv for permitted values."
 
-  RV32Zk:
+  RV32K:
     datatype: str
     default: ibex_pkg::RV32ZkNone
     paramtype: vlogdefine
-    description: "Zk cryptography extension implementation parameter enum. See the ibex_pkg::rv32zk_e enum in ibex_pkg.sv for permitted values."
+    description: "Cryptography extension implementation parameter enum. See the ibex_pkg::rv32zk_e enum in ibex_pkg.sv for permitted values."
 
   RegFile:
     datatype: str
@@ -111,7 +111,7 @@ targets:
       - RV32E
       - RV32M
       - RV32B
-      - RV32Zk
+      - RV32K
       - RegFile
       - ICache
       - ICacheECC
diff --git a/examples/simple_system/rtl/ibex_simple_system.sv b/examples/simple_system/rtl/ibex_simple_system.sv
index ae9540ad03..4f17ba01ae 100644
--- a/examples/simple_system/rtl/ibex_simple_system.sv
+++ b/examples/simple_system/rtl/ibex_simple_system.sv
@@ -14,8 +14,8 @@
   `define RV32B ibex_pkg::RV32BNone
 `endif
 
-`ifndef RV32Zk
-  `define RV32Zk ibex_pkg::RV32ZkNone
+`ifndef RV32K
+  `define RV32K ibex_pkg::RV32ZkNone
 `endif
 
 `ifndef RegFile
@@ -46,7 +46,7 @@ module ibex_simple_system (
   parameter bit                 RV32E                    = 1'b0;
   parameter ibex_pkg::rv32m_e   RV32M                    = `RV32M;
   parameter ibex_pkg::rv32b_e   RV32B                    = `RV32B;
-  parameter ibex_pkg::rv32zk_e  RV32Zk                   = `RV32Zk;
+  parameter ibex_pkg::rv32zk_e  RV32K                    = `RV32K;
   parameter ibex_pkg::regfile_e RegFile                  = `RegFile;
   parameter bit                 BranchTargetALU          = 1'b0;
   parameter bit                 WritebackStage           = 1'b0;
@@ -175,7 +175,7 @@ module ibex_simple_system (
       .RV32E           ( RV32E           ),
       .RV32M           ( RV32M           ),
       .RV32B           ( RV32B           ),
-      .RV32Zk          ( RV32Zk          ),
+      .RV32K           ( RV32K           ),
       .RegFile         ( RegFile         ),
       .BranchTargetALU ( BranchTargetALU ),
       .ICache          ( ICache          ),
diff --git a/ibex_configs.yaml b/ibex_configs.yaml
index 16407922d5..543804ad40 100644
--- a/ibex_configs.yaml
+++ b/ibex_configs.yaml
@@ -11,7 +11,7 @@ small:
   RV32E                    : 0
   RV32M                    : "ibex_pkg::RV32MFast"
   RV32B                    : "ibex_pkg::RV32BNone"
-  RV32Zk                   : "ibex_pkg::RV32ZkNone"
+  RV32K                    : "ibex_pkg::RV32ZkNone"
   RegFile                  : "ibex_pkg::RegFileFF"
   BranchTargetALU          : 0
   WritebackStage           : 0
@@ -28,7 +28,7 @@ opentitan:
   RV32E                    : 0
   RV32M                    : "ibex_pkg::RV32MSingleCycle"
   RV32B                    : "ibex_pkg::RV32BNone"
-  RV32Zk                   : "ibex_pkg::RV32ZkNone"
+  RV32K                    : "ibex_pkg::RV32ZkNone"
   RegFile                  : "ibex_pkg::RegFileFF"
   BranchTargetALU          : 1
   WritebackStage           : 1
@@ -51,7 +51,7 @@ experimental-maxperf:
   RV32E                    : 0
   RV32M                    : "ibex_pkg::RV32MSingleCycle"
   RV32B                    : "ibex_pkg::RV32BNone"
-  RV32Zk                   : "ibex_pkg::RV32ZkNone"
+  RV32K                    : "ibex_pkg::RV32ZkNone"
   RegFile                  : "ibex_pkg::RegFileFF"
   BranchTargetALU          : 1
   WritebackStage           : 1
@@ -68,7 +68,7 @@ experimental-maxperf-pmp:
   RV32E                    : 0
   RV32M                    : "ibex_pkg::RV32MSingleCycle"
   RV32B                    : "ibex_pkg::RV32BNone"
-  RV32Zk                   : "ibex_pkg::RV32ZkNone"
+  RV32K                    : "ibex_pkg::RV32ZkNone"
   RegFile                  : "ibex_pkg::RegFileFF"
   BranchTargetALU          : 1
   WritebackStage           : 1
@@ -85,7 +85,7 @@ experimental-maxperf-pmp-bmbalanced:
   RV32E                    : 0
   RV32M                    : "ibex_pkg::RV32MSingleCycle"
   RV32B                    : "ibex_pkg::RV32BBalanced"
-  RV32Zk                   : "ibex_pkg::RV32ZkNone"
+  RV32K                    : "ibex_pkg::RV32ZkNone"
   RegFile                  : "ibex_pkg::RegFileFF"
   BranchTargetALU          : 1
   WritebackStage           : 1
@@ -102,7 +102,7 @@ experimental-maxperf-pmp-bmfull:
   RV32E                    : 0
   RV32M                    : "ibex_pkg::RV32MSingleCycle"
   RV32B                    : "ibex_pkg::RV32BFull"
-  RV32Zk                   : "ibex_pkg::RV32ZkNone"
+  RV32K                    : "ibex_pkg::RV32ZkNone"
   RegFile                  : "ibex_pkg::RegFileFF"
   BranchTargetALU          : 1
   WritebackStage           : 1
@@ -119,7 +119,7 @@ experimental-maxperf-pmp-zkn:
   RV32E                    : 0
   RV32M                    : "ibex_pkg::RV32MSingleCycle"
   RV32B                    : "ibex_pkg::RV32BNone"
-  RV32Zk                   : "ibex_pkg::RV32Zkn"
+  RV32K                    : "ibex_pkg::RV32Zkn"
   RegFile                  : "ibex_pkg::RegFileFF"
   BranchTargetALU          : 1
   WritebackStage           : 1
@@ -136,7 +136,7 @@ experimental-maxperf-pmp-zks:
   RV32E                    : 0
   RV32M                    : "ibex_pkg::RV32MSingleCycle"
   RV32B                    : "ibex_pkg::RV32BNone"
-  RV32Zk                   : "ibex_pkg::RV32Zks"
+  RV32K                    : "ibex_pkg::RV32Zks"
   RegFile                  : "ibex_pkg::RegFileFF"
   BranchTargetALU          : 1
   WritebackStage           : 1
@@ -154,7 +154,7 @@ experimental-maxperf-pmp-bmfull-icache:
   RV32E                    : 0
   RV32M                    : "ibex_pkg::RV32MSingleCycle"
   RV32B                    : "ibex_pkg::RV32BFull"
-  RV32Zk                   : "ibex_pkg::RV32ZkNone"
+  RV32K                    : "ibex_pkg::RV32ZkNone"
   RegFile                  : "ibex_pkg::RegFileFF"
   BranchTargetALU          : 1
   WritebackStage           : 1
@@ -174,7 +174,7 @@ experimental-branch-predictor:
   RV32E                    : 0
   RV32M                    : "ibex_pkg::RV32MSingleCycle"
   RV32B                    : "ibex_pkg::RV32BNone"
-  RV32Zk                   : "ibex_pkg::RV32ZkNone"
+  RV32K                    : "ibex_pkg::RV32ZkNone"
   RegFile                  : "ibex_pkg::RegFileFF"
   BranchTargetALU          : 1
   WritebackStage           : 1
diff --git a/ibex_core.core b/ibex_core.core
index b43baae164..c84f68368a 100644
--- a/ibex_core.core
+++ b/ibex_core.core
@@ -85,11 +85,11 @@ parameters:
     paramtype: vlogdefine
     description: "Bitmanip implementation parameter enum. See the ibex_pkg::rv32b_e enum in ibex_pkg.sv for permitted values."
 
-  RV32Zk:
+  RV32K:
     datatype: str
     default: ibex_pkg::RV32ZkNone
     paramtype: vlogdefine
-    description: "Zk cryptography extension implementation parameter enum. See the ibex_pkg::rv32zk_e enum in ibex_pkg.sv for permitted values."
+    description: "Cryptography extension implementation parameter enum. See the ibex_pkg::rv32zk_e enum in ibex_pkg.sv for permitted values."
 
   RegFile:
     datatype: str
diff --git a/ibex_top_tracing.core b/ibex_top_tracing.core
index 1cff3fce4b..92c751226f 100644
--- a/ibex_top_tracing.core
+++ b/ibex_top_tracing.core
@@ -41,11 +41,11 @@ parameters:
     paramtype: vlogdefine
     description: "Bitmanip implementation parameter enum. See the ibex_pkg::rv32b_e enum in ibex_pkg.sv for permitted values."
 
-  RV32Zk:
+  RV32K:
     datatype: str
     default: ibex_pkg::RV32ZkNone
     paramtype: vlogdefine
-    description: "Zk cryptography extension implementation parameter enum. See the ibex_pkg::rv32zk_e enum in ibex_pkg.sv for permitted values."
+    description: "Cryptography extension implementation parameter enum. See the ibex_pkg::rv32zk_e enum in ibex_pkg.sv for permitted values."
 
   RegFile:
     datatype: str
@@ -123,7 +123,7 @@ targets:
       - RV32E
       - RV32M
       - RV32B
-      - RV32Zk
+      - RV32K
       - RegFile
       - ICache
       - ICacheECC
diff --git a/rtl/ibex_core.sv b/rtl/ibex_core.sv
index 27bd7aa80b..309cd3bc1b 100644
--- a/rtl/ibex_core.sv
+++ b/rtl/ibex_core.sv
@@ -21,7 +21,7 @@ module ibex_core import ibex_pkg::*; #(
   parameter bit          RV32E             = 1'b0,
   parameter rv32m_e      RV32M             = RV32MFast,
   parameter rv32b_e      RV32B             = RV32BNone,
-  parameter rv32zk_e     RV32Zk            = RV32ZkNone,
+  parameter rv32zk_e     RV32K             = RV32ZkNone,
   parameter bit          BranchTargetALU   = 1'b0,
   parameter bit          WritebackStage    = 1'b0,
   parameter bit          ICache            = 1'b0,
@@ -469,7 +469,7 @@ module ibex_core import ibex_pkg::*; #(
     .RV32E          (RV32E),
     .RV32M          (RV32M),
     .RV32B          (RV32B),
-    .RV32Zk         (RV32Zk),
+    .RV32Zk         (RV32K),
     .BranchTargetALU(BranchTargetALU),
     .DataIndTiming  (DataIndTiming),
     .SpecBranch     (SpecBranch),
@@ -629,7 +629,7 @@ module ibex_core import ibex_pkg::*; #(
   ibex_ex_block #(
     .RV32M          (RV32M),
     .RV32B          (RV32B),
-    .RV32Zk         (RV32Zk),
+    .RV32Zk         (RV32K),
     .BranchTargetALU(BranchTargetALU)
   ) ex_block_i (
     .clk_i (clk_i),
@@ -914,7 +914,6 @@ module ibex_core import ibex_pkg::*; #(
     .RV32E            (RV32E),
     .RV32M            (RV32M),
     .RV32B            (RV32B)
-//    .RV32Zk           (RV32Zk)
   ) cs_registers_i (
     .clk_i (clk_i),
     .rst_ni(rst_ni),
diff --git a/rtl/ibex_lockstep.sv b/rtl/ibex_lockstep.sv
index 54b8cddb8e..846b2d04eb 100644
--- a/rtl/ibex_lockstep.sv
+++ b/rtl/ibex_lockstep.sv
@@ -16,7 +16,7 @@ module ibex_lockstep import ibex_pkg::*; #(
   parameter bit          RV32E             = 1'b0,
   parameter rv32m_e      RV32M             = RV32MFast,
   parameter rv32b_e      RV32B             = RV32BNone,
-  parameter rv32zk_e     RV32Zk            = RV32ZkNone,
+  parameter rv32zk_e     RV32K             = RV32ZkNone,
   parameter bit          BranchTargetALU   = 1'b0,
   parameter bit          WritebackStage    = 1'b0,
   parameter bit          ICache            = 1'b0,
@@ -324,7 +324,7 @@ module ibex_lockstep import ibex_pkg::*; #(
     .RV32E             ( RV32E             ),
     .RV32M             ( RV32M             ),
     .RV32B             ( RV32B             ),
-    .RV32Zk            ( RV32Zk            ),
+    .RV32K             ( RV32K            ),
     .BranchTargetALU   ( BranchTargetALU   ),
     .ICache            ( ICache            ),
     .ICacheECC         ( ICacheECC         ),
diff --git a/rtl/ibex_top.sv b/rtl/ibex_top.sv
index 42670a6ae0..a2e30b7b90 100644
--- a/rtl/ibex_top.sv
+++ b/rtl/ibex_top.sv
@@ -21,7 +21,7 @@ module ibex_top import ibex_pkg::*; #(
   parameter bit          RV32E            = 1'b0,
   parameter rv32m_e      RV32M            = RV32MFast,
   parameter rv32b_e      RV32B            = RV32BNone,
-  parameter rv32zk_e     RV32Zk           = RV32ZkNone,
+  parameter rv32zk_e     RV32K            = RV32ZkNone,
   parameter regfile_e    RegFile          = RegFileFF,
   parameter bit          BranchTargetALU  = 1'b0,
   parameter bit          WritebackStage   = 1'b0,
@@ -196,7 +196,7 @@ module ibex_top import ibex_pkg::*; #(
     .RV32E            (RV32E),
     .RV32M            (RV32M),
     .RV32B            (RV32B),
-    .RV32Zk           (RV32Zk),
+    .RV32K            (RV32K),
     .BranchTargetALU  (BranchTargetALU),
     .ICache           (ICache),
     .ICacheECC        (ICacheECC),
@@ -668,7 +668,7 @@ module ibex_top import ibex_pkg::*; #(
       .RV32E            (RV32E),
       .RV32M            (RV32M),
       .RV32B            (RV32B),
-      .RV32Zk           (RV32Zk),
+      .RV32K            (RV32K),
       .BranchTargetALU  (BranchTargetALU),
       .ICache           (ICache),
       .ICacheECC        (ICacheECC),
diff --git a/rtl/ibex_top_tracing.sv b/rtl/ibex_top_tracing.sv
index c34cf1b99b..736e4e5790 100644
--- a/rtl/ibex_top_tracing.sv
+++ b/rtl/ibex_top_tracing.sv
@@ -15,7 +15,7 @@ module ibex_top_tracing import ibex_pkg::*; #(
   parameter bit          RV32E            = 1'b0,
   parameter rv32m_e      RV32M            = RV32MFast,
   parameter rv32b_e      RV32B            = RV32BNone,
-  parameter rv32zk_e     RV32Zk           = RV32ZkNone,
+  parameter rv32zk_e     RV32K            = RV32ZkNone,
   parameter regfile_e    RegFile          = RegFileFF,
   parameter bit          BranchTargetALU  = 1'b0,
   parameter bit          WritebackStage   = 1'b0,
@@ -137,7 +137,7 @@ module ibex_top_tracing import ibex_pkg::*; #(
     .RV32E            ( RV32E            ),
     .RV32M            ( RV32M            ),
     .RV32B            ( RV32B            ),
-    .RV32Zk           ( RV32Zk            ),
+    .RV32K            ( RV32K            ),
     .RegFile          ( RegFile          ),
     .BranchTargetALU  ( BranchTargetALU  ),
     .ICache           ( ICache           ),

From 4990ce7c1db0ecb2143bf7ac08019c85b59eaa87 Mon Sep 17 00:00:00 2001
From: phthinh <hung3@e.ntu.edu.sg>
Date: Tue, 7 Dec 2021 17:05:48 +0000
Subject: [PATCH 19/21] ZK: fix some changes in rtl from upstream merging.

---
 .../rtl/ibex_riscv_compliance.sv              |   5 +-
 rtl/ibex_aes_sbox.sv                          | 384 +++++++++---------
 rtl/ibex_decoder.sv                           |  50 +--
 rtl/ibex_sm4_sbox.sv                          | 260 ++++++------
 rtl/ibex_zk.sv                                |  52 +--
 5 files changed, 378 insertions(+), 373 deletions(-)

diff --git a/dv/riscv_compliance/rtl/ibex_riscv_compliance.sv b/dv/riscv_compliance/rtl/ibex_riscv_compliance.sv
index e55fd0f925..bf87c078b6 100644
--- a/dv/riscv_compliance/rtl/ibex_riscv_compliance.sv
+++ b/dv/riscv_compliance/rtl/ibex_riscv_compliance.sv
@@ -10,6 +10,9 @@
  * simulators (if the top-level clk and rst ports are replaced with a generated
  * clock).
  */
+ `ifndef RV32K
+   `define RV32K ibex_pkg::RV32ZkNone
+ `endif
 
 module ibex_riscv_compliance (
   input IO_CLK,
@@ -22,7 +25,7 @@ module ibex_riscv_compliance (
   parameter bit RV32E                   = 1'b0;
   parameter ibex_pkg::rv32m_e RV32M     = ibex_pkg::RV32MFast;
   parameter ibex_pkg::rv32b_e RV32B     = ibex_pkg::RV32BNone;
-  parameter ibex_pkg::rv32zk_e RV32K    = ibex_pkg::RV32ZkNone;
+  parameter ibex_pkg::rv32zk_e RV32K    = `RV32K;
 
   parameter ibex_pkg::regfile_e RegFile = ibex_pkg::RegFileFF;
   parameter bit BranchTargetALU         = 1'b0;
diff --git a/rtl/ibex_aes_sbox.sv b/rtl/ibex_aes_sbox.sv
index 36e23ce0ff..ebd18a4489 100644
--- a/rtl/ibex_aes_sbox.sv
+++ b/rtl/ibex_aes_sbox.sv
@@ -28,33 +28,33 @@ function automatic logic [20:0] aes_sbox_top(logic [7:0] x);
     logic  y9,  y8,  y7,  y6,  y5,  y4,  y3,  y2,  y1,  y0 ;
     logic  t5,  t4,  t3,  t2,  t1,  t0 ;
 
-    assign y0    = x[ 0] ;
-    assign y1    = x[ 7] ^     x[ 4];
-    assign y2    = x[ 7] ^     x[ 2];
-    assign y3    = x[ 7] ^     x[ 1];
-    assign y4    = x[ 4] ^     x[ 2];
-    assign t0    = x[ 3] ^     x[ 1];
-    assign y5    = y1    ^     t0   ;
-    assign t1    = x[ 6] ^     x[ 5];
-    assign y6    = x[ 0] ^     y5   ;
-    assign y7    = x[ 0] ^     t1   ;
-    assign y8    = y5    ^     t1   ;
-    assign t2    = x[ 6] ^     x[ 2];
-    assign t3    = x[ 5] ^     x[ 2];
-    assign y9    = y3    ^     y4   ;
-    assign y10   = y5    ^     t2   ;
-    assign y11   = t0    ^     t2   ;
-    assign y12   = t0    ^     t3   ;
-    assign y13   = y7    ^     y12  ;
-    assign t4    = x[ 4] ^     x[ 0];
-    assign y14   = t1    ^     t4   ;
-    assign y15   = y1    ^     y14  ;
-    assign t5    = x[ 1] ^     x[ 0];
-    assign y16   = t1    ^     t5   ;
-    assign y17   = y2    ^     y16  ;
-    assign y18   = y2    ^     y8   ;
-    assign y19   = y15   ^     y13  ;
-    assign y20   = y1    ^     t3   ;
+    y0    = x[ 0] ;
+    y1    = x[ 7] ^     x[ 4];
+    y2    = x[ 7] ^     x[ 2];
+    y3    = x[ 7] ^     x[ 1];
+    y4    = x[ 4] ^     x[ 2];
+    t0    = x[ 3] ^     x[ 1];
+    y5    = y1    ^     t0   ;
+    t1    = x[ 6] ^     x[ 5];
+    y6    = x[ 0] ^     y5   ;
+    y7    = x[ 0] ^     t1   ;
+    y8    = y5    ^     t1   ;
+    t2    = x[ 6] ^     x[ 2];
+    t3    = x[ 5] ^     x[ 2];
+    y9    = y3    ^     y4   ;
+    y10   = y5    ^     t2   ;
+    y11   = t0    ^     t2   ;
+    y12   = t0    ^     t3   ;
+    y13   = y7    ^     y12  ;
+    t4    = x[ 4] ^     x[ 0];
+    y14   = t1    ^     t4   ;
+    y15   = y1    ^     y14  ;
+    t5    = x[ 1] ^     x[ 0];
+    y16   = t1    ^     t5   ;
+    y17   = y2    ^     y16  ;
+    y18   = y2    ^     y8   ;
+    y19   = y15   ^     y13  ;
+    y20   = y1    ^     t3   ;
 
     return {y20, y19, y18, y17, y16, y15, y14, y13, y12, y11,
             y10, y9,  y8,  y7,  y6,  y5,  y4,  y3,  y2,  y1,  y0};
@@ -66,44 +66,44 @@ function automatic logic [7:0] aes_sbox_out(logic [17:0] x);
     logic  t29, t28, t27, t26, t25, t24, t23, t22, t21, t20;
     logic  t19, t18, t17, t16, t15, t14, t13, t12, t11, t10;
     logic  t9,  t8,  t7,  t6,  t5,  t4,  t3,  t2,  t1,  t0 ;
-    assign t0   = x[11] ^  x[12];
-    assign t1   = x[0] ^   x[6];
-    assign t2   = x[14] ^  x[16];
-    assign t3   = x[15] ^  x[5];
-    assign t4   = x[4] ^   x[8];
-    assign t5   = x[17] ^  x[11];
-    assign t6   = x[12] ^  t5;
-    assign t7   = x[14] ^  t3;
-    assign t8   = x[1] ^   x[9];
-    assign t9   = x[2] ^   x[3];
-    assign t10  = x[3] ^   t4;
-    assign t11  = x[10] ^  t2;
-    assign t12  = x[16] ^  x[1];
-    assign t13  = x[0] ^   t0;
-    assign t14  = x[2] ^   x[11];
-    assign t15  = x[5] ^   t1;
-    assign t16  = x[6] ^   t0;
-    assign t17  = x[7] ^   t1;
-    assign t18  = x[8] ^   t8;
-    assign t19  = x[13] ^  t4;
-    assign t20  = t0 ^     t1;
-    assign t21  = t1 ^     t7;
-    assign t22  = t3 ^     t12;
-    assign t23  = t18 ^    t2;
-    assign t24  = t15 ^    t9;
-    assign t25  = t6 ^     t10;
-    assign t26  = t7 ^     t9;
-    assign t27  = t8 ^     t10;
-    assign t28  = t11 ^    t14;
-    assign t29  = t11 ^    t17;
-    assign  y[0] = t6 ^~  t23;
-    assign  y[1] = t13 ^~ t27;
-    assign  y[2] = t25 ^  t29;
-    assign  y[3] = t20 ^  t22;
-    assign  y[4] = t6 ^   t21;
-    assign  y[5] = t19 ^~ t28;
-    assign  y[6] = t16 ^~ t26;
-    assign  y[7] = t6 ^   t24;
+    t0   = x[11] ^  x[12];
+    t1   = x[0] ^   x[6];
+    t2   = x[14] ^  x[16];
+    t3   = x[15] ^  x[5];
+    t4   = x[4] ^   x[8];
+    t5   = x[17] ^  x[11];
+    t6   = x[12] ^  t5;
+    t7   = x[14] ^  t3;
+    t8   = x[1] ^   x[9];
+    t9   = x[2] ^   x[3];
+    t10  = x[3] ^   t4;
+    t11  = x[10] ^  t2;
+    t12  = x[16] ^  x[1];
+    t13  = x[0] ^   t0;
+    t14  = x[2] ^   x[11];
+    t15  = x[5] ^   t1;
+    t16  = x[6] ^   t0;
+    t17  = x[7] ^   t1;
+    t18  = x[8] ^   t8;
+    t19  = x[13] ^  t4;
+    t20  = t0 ^     t1;
+    t21  = t1 ^     t7;
+    t22  = t3 ^     t12;
+    t23  = t18 ^    t2;
+    t24  = t15 ^    t9;
+    t25  = t6 ^     t10;
+    t26  = t7 ^     t9;
+    t27  = t8 ^     t10;
+    t28  = t11 ^    t14;
+    t29  = t11 ^    t17;
+    y[0] = t6 ^~  t23;
+    y[1] = t13 ^~ t27;
+    y[2] = t25 ^  t29;
+    y[3] = t20 ^  t22;
+    y[4] = t6 ^   t21;
+    y[5] = t19 ^~ t28;
+    y[6] = t16 ^~ t26;
+    y[7] = t6 ^   t24;
     return  y;
 endfunction
 
@@ -116,71 +116,71 @@ function automatic logic [17:0] aes_sbox_inv_mid(logic [20:0] x);
     logic  t29, t28, t27, t26, t25, t24, t23, t22, t21, t20;
     logic  t19, t18, t17, t16, t15, t14, t13, t12, t11, t10;
     logic  t9,  t8,  t7,  t6,  t5,  t4,  t3,  t2,  t1,  t0 ;
-    assign t0  = x[ 3] ^     x[12];
-    assign t1  = x[ 9] &     x[ 5];
-    assign t2  = x[17] &     x[ 6];
-    assign t3  = x[10] ^     t1   ;
-    assign t4  = x[14] &     x[ 0];
-    assign t5  = t4    ^     t1   ;
-    assign t6  = x[ 3] &     x[12];
-    assign t7  = x[16] &     x[ 7];
-    assign t8  = t0    ^     t6   ;
-    assign t9  = x[15] &     x[13];
-    assign t10 = t9    ^     t6   ;
-    assign t11 = x[ 1] &     x[11];
-    assign t12 = x[ 4] &     x[20];
-    assign t13 = t12   ^     t11  ;
-    assign t14 = x[ 2] &     x[ 8];
-    assign t15 = t14   ^     t11  ;
-    assign t16 = t3    ^     t2   ;
-    assign t17 = t5    ^     x[18];
-    assign t18 = t8    ^     t7   ;
-    assign t19 = t10   ^     t15  ;
-    assign t20 = t16   ^     t13  ;
-    assign t21 = t17   ^     t15  ;
-    assign t22 = t18   ^     t13  ;
-    assign t23 = t19   ^     x[19];
-    assign t24 = t22   ^     t23  ;
-    assign t25 = t22   &     t20  ;
-    assign t26 = t21   ^     t25  ;
-    assign t27 = t20   ^     t21  ;
-    assign t28 = t23   ^     t25  ;
-    assign t29 = t28   &     t27  ;
-    assign t30 = t26   &     t24  ;
-    assign t31 = t20   &     t23  ;
-    assign t32 = t27   &     t31  ;
-    assign t33 = t27   ^     t25  ;
-    assign t34 = t21   &     t22  ;
-    assign t35 = t24   &     t34  ;
-    assign t36 = t24   ^     t25  ;
-    assign t37 = t21   ^     t29  ;
-    assign t38 = t32   ^     t33  ;
-    assign t39 = t23   ^     t30  ;
-    assign t40 = t35   ^     t36  ;
-    assign t41 = t38   ^     t40  ;
-    assign t42 = t37   ^     t39  ;
-    assign t43 = t37   ^     t38  ;
-    assign t44 = t39   ^     t40  ;
-    assign t45 = t42   ^     t41  ;
+    t0  = x[ 3] ^     x[12];
+    t1  = x[ 9] &     x[ 5];
+    t2  = x[17] &     x[ 6];
+    t3  = x[10] ^     t1   ;
+    t4  = x[14] &     x[ 0];
+    t5  = t4    ^     t1   ;
+    t6  = x[ 3] &     x[12];
+    t7  = x[16] &     x[ 7];
+    t8  = t0    ^     t6   ;
+    t9  = x[15] &     x[13];
+    t10 = t9    ^     t6   ;
+    t11 = x[ 1] &     x[11];
+    t12 = x[ 4] &     x[20];
+    t13 = t12   ^     t11  ;
+    t14 = x[ 2] &     x[ 8];
+    t15 = t14   ^     t11  ;
+    t16 = t3    ^     t2   ;
+    t17 = t5    ^     x[18];
+    t18 = t8    ^     t7   ;
+    t19 = t10   ^     t15  ;
+    t20 = t16   ^     t13  ;
+    t21 = t17   ^     t15  ;
+    t22 = t18   ^     t13  ;
+    t23 = t19   ^     x[19];
+    t24 = t22   ^     t23  ;
+    t25 = t22   &     t20  ;
+    t26 = t21   ^     t25  ;
+    t27 = t20   ^     t21  ;
+    t28 = t23   ^     t25  ;
+    t29 = t28   &     t27  ;
+    t30 = t26   &     t24  ;
+    t31 = t20   &     t23  ;
+    t32 = t27   &     t31  ;
+    t33 = t27   ^     t25  ;
+    t34 = t21   &     t22  ;
+    t35 = t24   &     t34  ;
+    t36 = t24   ^     t25  ;
+    t37 = t21   ^     t29  ;
+    t38 = t32   ^     t33  ;
+    t39 = t23   ^     t30  ;
+    t40 = t35   ^     t36  ;
+    t41 = t38   ^     t40  ;
+    t42 = t37   ^     t39  ;
+    t43 = t37   ^     t38  ;
+    t44 = t39   ^     t40  ;
+    t45 = t42   ^     t41  ;
 
-    assign  y[ 0] = t38 &     x[ 7];
-    assign  y[ 1] = t37 &     x[13];
-    assign  y[ 2] = t42 &     x[11];
-    assign  y[ 3] = t45 &     x[20];
-    assign  y[ 4] = t41 &     x[ 8];
-    assign  y[ 5] = t44 &     x[ 9];
-    assign  y[ 6] = t40 &     x[17];
-    assign  y[ 7] = t39 &     x[14];
-    assign  y[ 8] = t43 &     x[ 3];
-    assign  y[ 9] = t38 &     x[16];
-    assign  y[10] = t37 &     x[15];
-    assign  y[11] = t42 &     x[ 1];
-    assign  y[12] = t45 &     x[ 4];
-    assign  y[13] = t41 &     x[ 2];
-    assign  y[14] = t44 &     x[ 5];
-    assign  y[15] = t40 &     x[ 6];
-    assign  y[16] = t39 &     x[ 0];
-    assign  y[17] = t43 &     x[12];
+    y[ 0] = t38 &     x[ 7];
+    y[ 1] = t37 &     x[13];
+    y[ 2] = t42 &     x[11];
+    y[ 3] = t45 &     x[20];
+    y[ 4] = t41 &     x[ 8];
+    y[ 5] = t44 &     x[ 9];
+    y[ 6] = t40 &     x[17];
+    y[ 7] = t39 &     x[14];
+    y[ 8] = t43 &     x[ 3];
+    y[ 9] = t38 &     x[16];
+    y[10] = t37 &     x[15];
+    y[11] = t42 &     x[ 1];
+    y[12] = t45 &     x[ 4];
+    y[13] = t41 &     x[ 2];
+    y[14] = t44 &     x[ 5];
+    y[15] = t40 &     x[ 6];
+    y[16] = t39 &     x[ 0];
+    y[17] = t43 &     x[12];
 
     return y;
 endfunction
@@ -191,32 +191,32 @@ function automatic logic [20:0] aes_inv_sbox_top(logic [7:0] x);
     logic  y19, y18, y17, y16, y15, y14, y13, y12, y11, y10;
     logic  y9,  y8,  y7,  y6,  y5,  y4,  y3,  y2,  y1,  y0 ;
     logic  t4,  t3,  t2,  t1,  t0 ;
-    assign y17 = x[ 7] ^     x[ 4];
-    assign y16 = x[ 6] ^~ x[ 4];
-    assign y2  = x[ 7] ^~ x[ 6];
-    assign y1  = x[ 4] ^     x[ 3];
-    assign y18 = x[ 3] ^~ x[ 0];
-    assign t0  = x[ 1] ^     x[ 0];
-    assign y6  = x[ 6] ^~ y17 ;
-    assign y14 = y16  ^     t0;
-    assign y7  = x[ 0] ^~ y1;
-    assign y8  = y2  ^     y18;
-    assign y9  = y2  ^     t0;
-    assign y3  = y1  ^     t0;
-    assign y19 = x[ 5] ^~ y1;
-    assign t1  = x[ 6] ^    x[ 1];
-    assign y13 = x[ 5] ^~ y14;
-    assign y15 = y18  ^     t1;
-    assign y4  = x[ 3] ^     y6;
-    assign t2  = x[ 5] ^~ x[ 2];
-    assign t3  = x[ 2] ^~ x[ 1];
-    assign t4  = x[ 5] ^~ x[ 3];
-    assign y5  = y16  ^     t2 ;
-    assign y12 = t1  ^     t4 ;
-    assign y20 = y1  ^     t3 ;
-    assign y11 = y8  ^     y20 ;
-    assign y10 = y8  ^     t3 ;
-    assign y0  = x[ 7] ^     t2 ;
+    y17 = x[ 7] ^     x[ 4];
+    y16 = x[ 6] ^~ x[ 4];
+    y2  = x[ 7] ^~ x[ 6];
+    y1  = x[ 4] ^     x[ 3];
+    y18 = x[ 3] ^~ x[ 0];
+    t0  = x[ 1] ^     x[ 0];
+    y6  = x[ 6] ^~ y17 ;
+    y14 = y16  ^     t0;
+    y7  = x[ 0] ^~ y1;
+    y8  = y2  ^     y18;
+    y9  = y2  ^     t0;
+    y3  = y1  ^     t0;
+    y19 = x[ 5] ^~ y1;
+    t1  = x[ 6] ^    x[ 1];
+    y13 = x[ 5] ^~ y14;
+    y15 = y18  ^     t1;
+    y4  = x[ 3] ^     y6;
+    t2  = x[ 5] ^~ x[ 2];
+    t3  = x[ 2] ^~ x[ 1];
+    t4  = x[ 5] ^~ x[ 3];
+    y5  = y16  ^     t2 ;
+    y12 = t1  ^     t4 ;
+    y20 = y1  ^     t3 ;
+    y11 = y8  ^     y20 ;
+    y10 = y8  ^     t3 ;
+    y0  = x[ 7] ^     t2 ;
 
     return {y20, y19, y18, y17, y16, y15, y14, y13, y12, y11,
             y10, y9,  y8,  y7,  y6,  y5,  y4,  y3,  y2,  y1,  y0};
@@ -228,44 +228,44 @@ function automatic logic [7:0] aes_inv_sbox_out(logic [17:0] x);
     logic  t29, t28, t27, t26, t25, t24, t23, t22,      t20;
     logic  t19, t18, t17, t16, t15, t14, t13, t12, t11, t10;
     logic  t9,  t8,  t7,  t6,  t5,  t4,  t3,  t2,  t1,  t0 ;
-    assign t0  = x[ 2] ^     x[11];
-    assign t1  = x[ 8] ^     x[ 9];
-    assign t2  = x[ 4] ^     x[12];
-    assign t3  = x[15] ^     x[ 0];
-    assign t4  = x[16] ^     x[ 6];
-    assign t5  = x[14] ^     x[ 1];
-    assign t6  = x[17] ^     x[10];
-    assign t7  = t0    ^     t1   ;
-    assign t8  = x[ 0] ^     x[ 3];
-    assign t9  = x[ 5] ^     x[13];
-    assign t10 = x[ 7] ^     t4   ;
-    assign t11 = t0    ^     t3   ;
-    assign t12 = x[14] ^     x[16];
-    assign t13 = x[17] ^     x[ 1];
-    assign t14 = x[17] ^     x[12];
-    assign t15 = x[ 4] ^     x[ 9];
-    assign t16 = x[ 7] ^     x[11];
-    assign t17 = x[ 8] ^     t2 ;
-    assign t18 = x[13] ^     t5 ;
-    assign t19 = t2   ^     t3 ;
-    assign t20 = t4   ^     t6 ;
-    assign t22 = t2   ^     t7 ;
-    assign t23 = t7   ^     t8 ;
-    assign t24 = t5   ^     t7 ;
-    assign t25 = t6   ^     t10;
-    assign t26 = t9   ^     t11;
-    assign t27 = t10  ^     t18;
-    assign t28 = t11  ^     t25;
-    assign t29 = t15  ^     t20;
+    t0  = x[ 2] ^     x[11];
+    t1  = x[ 8] ^     x[ 9];
+    t2  = x[ 4] ^     x[12];
+    t3  = x[15] ^     x[ 0];
+    t4  = x[16] ^     x[ 6];
+    t5  = x[14] ^     x[ 1];
+    t6  = x[17] ^     x[10];
+    t7  = t0    ^     t1   ;
+    t8  = x[ 0] ^     x[ 3];
+    t9  = x[ 5] ^     x[13];
+    t10 = x[ 7] ^     t4   ;
+    t11 = t0    ^     t3   ;
+    t12 = x[14] ^     x[16];
+    t13 = x[17] ^     x[ 1];
+    t14 = x[17] ^     x[12];
+    t15 = x[ 4] ^     x[ 9];
+    t16 = x[ 7] ^     x[11];
+    t17 = x[ 8] ^     t2 ;
+    t18 = x[13] ^     t5 ;
+    t19 = t2   ^     t3 ;
+    t20 = t4   ^     t6 ;
+    t22 = t2   ^     t7 ;
+    t23 = t7   ^     t8 ;
+    t24 = t5   ^     t7 ;
+    t25 = t6   ^     t10;
+    t26 = t9   ^     t11;
+    t27 = t10  ^     t18;
+    t28 = t11  ^     t25;
+    t29 = t15  ^     t20;
 
-    assign y[ 0] = t9  ^ t16;
-    assign y[ 1] = t14 ^ t23;
-    assign y[ 2] = t19 ^ t24;
-    assign y[ 3] = t23 ^ t27;
-    assign y[ 4] = t12 ^ t22;
-    assign y[ 5] = t17 ^ t28;
-    assign y[ 6] = t26 ^ t29;
-    assign y[ 7] = t13 ^ t22;
+    y[ 0] = t9  ^ t16;
+    y[ 1] = t14 ^ t23;
+    y[ 2] = t19 ^ t24;
+    y[ 3] = t23 ^ t27;
+    y[ 4] = t12 ^ t22;
+    y[ 5] = t17 ^ t28;
+    y[ 6] = t26 ^ t29;
+    y[ 7] = t13 ^ t22;
     return  y;
 endfunction
 
diff --git a/rtl/ibex_decoder.sv b/rtl/ibex_decoder.sv
index d712c5e3b3..50923625f4 100644
--- a/rtl/ibex_decoder.sv
+++ b/rtl/ibex_decoder.sv
@@ -418,16 +418,20 @@ module ibex_decoder #(
                 5'b0_1000: illegal_insn = (instr[26:25] == 2'b00) ? 1'b0 : 1'b1;       // srai
 
                 5'b0_0100: illegal_insn = (RV32B == RV32BFull) ? 1'b0 : 1'b1;          // sroi
-                5'b0_1100,                                                             // rori
+                5'b0_1100: illegal_insn = ((RV32B != RV32BNone)  ||
+                                           (RV32Zk!= RV32ZkNone)) ? 1'b0 : 1'b1;       // rori
                 5'b0_1001: illegal_insn = (RV32B != RV32BNone) ? 1'b0 : 1'b1;          // bexti
 
                 5'b0_1101: begin
                   if ((RV32B == RV32BFull)) begin
                     illegal_insn = 1'b0;                                               // grevi
                   end else if (RV32B == RV32BBalanced) begin
-                    illegal_insn = (instr[24:20] == 5'b11000) ? 1'b0 : 1'b1;           // rev8
-                  end else begin
-                    illegal_insn = 1'b1;
+                    unique case (instr[24:20])
+                      5'b11000: illegal_insn = ((RV32B  == RV32BBalanced) ||
+                                                (RV32Zk != RV32ZkNone   )) ? 1'b0 : 1'b1; // rev8
+                      5'b00111: illegal_insn = ( RV32Zk != RV32ZkNone    ) ? 1'b0 : 1'b1; // brev8
+                      default: illegal_insn = 1'b1;
+                    endcase
                   end
                 end
                 5'b0_0101: begin
@@ -521,11 +525,11 @@ module ibex_decoder #(
             {7'b001_0100, 3'b101}, // gorc
             {7'b000_0100, 3'b001}, // shfl
             {7'b000_0100, 3'b101}, // unshfl
-            {7'b001_0100, 3'b010}, // xperm.n
-            {7'b001_0100, 3'b100}, // xperm.b
-            {7'b001_0100, 3'b110}, // xperm.h
-            {7'b001_0000, 3'b001}, // slo
-            {7'b001_0000, 3'b101}, // sro
+            {7'b001_0100, 3'b110}: illegal_insn = (RV32B == RV32BFull) ? 1'b0 : 1'b1; // xperm.h
+            // RV32B zbp & RV32Zk zbk
+            {7'b001_0100, 3'b010}, // xperm.n/xperm4
+            {7'b001_0100, 3'b100}: illegal_insn = ((RV32B  == RV32BFull ) ||
+                                                   (RV32Zk != RV32ZkNone)) ? 1'b0 : 1'b1;// xperm.b/xperm8
             // RV32B zbc
             {7'b000_0101, 3'b001}, // clmul
             {7'b000_0101, 3'b011}: illegal_insn = ((RV32B  == RV32BFull ) ||
@@ -533,10 +537,6 @@ module ibex_decoder #(
             // RV32B zbc
             {7'b000_0101, 3'b010}: illegal_insn = (RV32B == RV32BFull) ? 1'b0 : 1'b1;     // clmulr
 
-            // RV32Zk zbk
-            {7'b001_0100, 3'b100}, // xperm8
-            {7'b001_0100, 3'b010}: illegal_insn = (RV32Zk != RV32ZkNone) ? 1'b0 : 1'b1; // xperm4
-
             // RV32Zk zkh
             {7'b010_1000, 3'b000}, // sha512_sum0r
             {7'b010_1001, 3'b000}, // sha512_sum1r
@@ -1084,7 +1084,7 @@ module ibex_decoder #(
             {7'b010_0100, 3'b100}: if (RV32B != RV32BNone) alu_operator_o = ALU_PACKU;  // packu
             {7'b000_0100, 3'b111}: begin
               if      (RV32B  != RV32BNone ) alu_operator_o = ALU_PACKH;  // packh
-              else if (RV32Zk != RV32ZkNone) alu_operator_o = ZKB_PACKH;  // andn
+              else if (RV32Zk != RV32ZkNone) alu_operator_o = ZKB_PACKH;  // packh
             end
 
             {7'b010_0000, 3'b100}: begin
@@ -1118,13 +1118,21 @@ module ibex_decoder #(
             {7'b001_0100, 3'b101}: if (RV32B != RV32BNone) alu_operator_o = ALU_GORC;    // gorc
             {7'b000_0100, 3'b001}: if (RV32B == RV32BFull) alu_operator_o = ALU_SHFL;    // shfl
             {7'b000_0100, 3'b101}: if (RV32B == RV32BFull) alu_operator_o = ALU_UNSHFL;  // unshfl
-            {7'b001_0100, 3'b010}: if (RV32B == RV32BFull) alu_operator_o = ALU_XPERM_N; // xperm.n
-            {7'b001_0100, 3'b100}: if (RV32B == RV32BFull) alu_operator_o = ALU_XPERM_B; // xperm.b
-            {7'b001_0100, 3'b110}: if (RV32B == RV32BFull) alu_operator_o = ALU_XPERM_H; // xperm.h
             {7'b001_0000, 3'b001}: if (RV32B == RV32BFull) alu_operator_o = ALU_SLO;     // slo
             {7'b001_0000, 3'b101}: if (RV32B == RV32BFull) alu_operator_o = ALU_SRO;     // sro
+            {7'b001_0100, 3'b110}: if (RV32B == RV32BFull) alu_operator_o = ALU_XPERM_H; // xperm.h
 
-            // RV32B zbc
+            // RV32B zbp & RV32K zkb
+            {7'b001_0100, 3'b010}: begin
+              if      (RV32B  == RV32BFull ) alu_operator_o = ALU_XPERM_N; // xperm.n
+              else if (RV32Zk != RV32ZkNone) alu_operator_o = ZKB_XPERM4;  // xperm4
+            end
+            {7'b001_0100, 3'b100}: begin
+              if      (RV32B == RV32BFull  ) alu_operator_o = ALU_XPERM_B; // xperm.b
+              else if (RV32Zk != RV32ZkNone) alu_operator_o = ZKB_XPERM8;  // xperm8
+            end
+
+            // RV32B zbc & RV32K zkb
             {7'b000_0101, 3'b001}: begin
               if      (RV32B  == RV32BFull ) alu_operator_o = ALU_CLMUL;  // clmul
               else if (RV32Zk != RV32ZkNone) alu_operator_o = ZKB_CLMUL;  // clmul
@@ -1149,12 +1157,6 @@ module ibex_decoder #(
               end
             end
 
-            // RV32Zk zbk
-            // xperm8
-            {7'b001_0100, 3'b100}: if (RV32Zk != RV32ZkNone) alu_operator_o = ZKB_XPERM8;
-            // xperm4
-            {7'b001_0100, 3'b010}: if (RV32Zk != RV32ZkNone) alu_operator_o = ZKB_XPERM4;
-
             // RV32Zk zkh
             // sha512_sum0r
             {7'b010_1000, 3'b000}: if (RV32Zk == RV32Zkn) alu_operator_o = ZKN_SHA512SUM0R;
diff --git a/rtl/ibex_sm4_sbox.sv b/rtl/ibex_sm4_sbox.sv
index bd814f3d85..0d3d7ba6fa 100644
--- a/rtl/ibex_sm4_sbox.sv
+++ b/rtl/ibex_sm4_sbox.sv
@@ -27,34 +27,34 @@ function automatic logic [20:0] sm4_sbox_top(logic [7:0] x);
     logic  y9,  y8,  y7,  y6,  y5,  y4,  y3,  y2,  y1,  y0 ;
     logic  t6,  t5,  t4,  t3,  t2,  t1,  t0 ;
 
-    assign y18 = x[ 2] ^  x[ 6];
-    assign t0  = x[ 3] ^  x[ 4];
-    assign t1  = x[ 2] ^  x[ 7];
-    assign t2  = x[ 7] ^  y18  ;
-    assign t3  = x[ 1] ^  t1   ;
-    assign t4  = x[ 6] ^  x[ 7];
-    assign t5  = x[ 0] ^  y18  ;
-    assign t6  = x[ 3] ^  x[ 6];
-    assign y10 = x[ 1] ^  y18;
-    assign y0  = x[ 5] ^~ y10;
-    assign y1  = t0    ^  t3 ;
-    assign y2  = x[ 0] ^  t0 ;
-    assign y4  = x[ 0] ^  t3 ;
-    assign y3  = x[ 3] ^  y4 ;
-    assign y5  = x[ 5] ^  t5 ;
-    assign y6  = x[ 0] ^~ x[ 1];
-    assign y7  = t0    ^~ y10;
-    assign y8  = t0    ^  t5 ;
-    assign y9  = x[ 3];
-    assign y11 = t0    ^  t4 ;
-    assign y12 = x[ 5] ^  t4 ;
-    assign y13 = x[ 5] ^~ y1 ;
-    assign y14 = x[ 4] ^~ t2 ;
-    assign y15 = x[ 1] ^~ t6 ;
-    assign y16 = x[ 0] ^~ t2 ;
-    assign y17 = t0    ^~ t2 ;
-    assign y19 = x[ 5] ^~ y14;
-    assign y20 = x[ 0] ^  t1 ;
+    y18 = x[ 2] ^  x[ 6];
+    t0  = x[ 3] ^  x[ 4];
+    t1  = x[ 2] ^  x[ 7];
+    t2  = x[ 7] ^  y18  ;
+    t3  = x[ 1] ^  t1   ;
+    t4  = x[ 6] ^  x[ 7];
+    t5  = x[ 0] ^  y18  ;
+    t6  = x[ 3] ^  x[ 6];
+    y10 = x[ 1] ^  y18;
+    y0  = x[ 5] ^~ y10;
+    y1  = t0    ^  t3 ;
+    y2  = x[ 0] ^  t0 ;
+    y4  = x[ 0] ^  t3 ;
+    y3  = x[ 3] ^  y4 ;
+    y5  = x[ 5] ^  t5 ;
+    y6  = x[ 0] ^~ x[ 1];
+    y7  = t0    ^~ y10;
+    y8  = t0    ^  t5 ;
+    y9  = x[ 3];
+    y11 = t0    ^  t4 ;
+    y12 = x[ 5] ^  t4 ;
+    y13 = x[ 5] ^~ y1 ;
+    y14 = x[ 4] ^~ t2 ;
+    y15 = x[ 1] ^~ t6 ;
+    y16 = x[ 0] ^~ t2 ;
+    y17 = t0    ^~ t2 ;
+    y19 = x[ 5] ^~ y14;
+    y20 = x[ 0] ^  t1 ;
 
     return {y20, y19, y18, y17, y16, y15, y14, y13, y12, y11,
             y10, y9,  y8,  y7,  y6,  y5,  y4,  y3,  y2,  y1,  y0};
@@ -66,44 +66,44 @@ function automatic logic [7:0] sm4_sbox_out(logic [17:0] x);
     logic  t29, t28, t27, t26, t25, t24, t23, t22, t21, t20;
     logic  t19, t18, t17, t16, t15, t14, t13, t12, t11, t10;
     logic  t9,  t8,  t7,  t6,  t5,  t4,  t3,  t2,  t1,  t0 ;
-    assign t0   = x[ 4] ^  x[ 7];
-    assign t1   = x[13] ^  x[15];
-    assign t2   = x[ 2] ^  x[16];
-    assign t3   = x[ 6] ^  t0;
-    assign t4   = x[12] ^  t1;
-    assign t5   = x[ 9] ^  x[10];
-    assign t6   = x[11] ^  t2;
-    assign t7   = x[ 1] ^  t4;
-    assign t8   = x[ 0] ^  x[17];
-    assign t9   = x[ 3] ^  x[17];
-    assign t10  = x[ 8] ^  t3;
-    assign t11  = t2    ^  t5;
-    assign t12  = x[14] ^  t6;
-    assign t13  = t7    ^  t9;
-    assign t14  = x[ 0] ^  x[ 6];
-    assign t15  = x[ 7] ^  x[16];
-    assign t16  = x[ 5] ^  x[13];
-    assign t17  = x[ 3] ^  x[15];
-    assign t18  = x[10] ^  x[12];
-    assign t19  = x[ 9] ^  t1 ;
-    assign t20  = x[ 4] ^  t4 ;
-    assign t21  = x[14] ^  t3 ;
-    assign t22  = x[16] ^  t5 ;
-    assign t23  = t7    ^  t14;
-    assign t24  = t8    ^  t11;
-    assign t25  = t0    ^  t12;
-    assign t26  = t17   ^  t3 ;
-    assign t27  = t18   ^  t10;
-    assign t28  = t19   ^  t6 ;
-    assign t29  = t8    ^  t10;
-    assign y[0] = t11   ^~ t13;
-    assign y[1] = t15   ^~ t23;
-    assign y[2] = t20   ^  t24;
-    assign y[3] = t16   ^  t25;
-    assign y[4] = t26   ^~ t22;
-    assign y[5] = t21   ^  t13;
-    assign y[6] = t27   ^~ t12;
-    assign y[7] = t28   ^~ t29;
+    t0   = x[ 4] ^  x[ 7];
+    t1   = x[13] ^  x[15];
+    t2   = x[ 2] ^  x[16];
+    t3   = x[ 6] ^  t0;
+    t4   = x[12] ^  t1;
+    t5   = x[ 9] ^  x[10];
+    t6   = x[11] ^  t2;
+    t7   = x[ 1] ^  t4;
+    t8   = x[ 0] ^  x[17];
+    t9   = x[ 3] ^  x[17];
+    t10  = x[ 8] ^  t3;
+    t11  = t2    ^  t5;
+    t12  = x[14] ^  t6;
+    t13  = t7    ^  t9;
+    t14  = x[ 0] ^  x[ 6];
+    t15  = x[ 7] ^  x[16];
+    t16  = x[ 5] ^  x[13];
+    t17  = x[ 3] ^  x[15];
+    t18  = x[10] ^  x[12];
+    t19  = x[ 9] ^  t1 ;
+    t20  = x[ 4] ^  t4 ;
+    t21  = x[14] ^  t3 ;
+    t22  = x[16] ^  t5 ;
+    t23  = t7    ^  t14;
+    t24  = t8    ^  t11;
+    t25  = t0    ^  t12;
+    t26  = t17   ^  t3 ;
+    t27  = t18   ^  t10;
+    t28  = t19   ^  t6 ;
+    t29  = t8    ^  t10;
+    y[0] = t11   ^~ t13;
+    y[1] = t15   ^~ t23;
+    y[2] = t20   ^  t24;
+    y[3] = t16   ^  t25;
+    y[4] = t26   ^~ t22;
+    y[5] = t21   ^  t13;
+    y[6] = t27   ^~ t12;
+    y[7] = t28   ^~ t29;
 
     return  y;
 endfunction
@@ -117,70 +117,70 @@ function automatic logic [17:0] sm4_sbox_inv_mid(logic [20:0] x);
     logic  t29, t28, t27, t26, t25, t24, t23, t22, t21, t20;
     logic  t19, t18, t17, t16, t15, t14, t13, t12, t11, t10;
     logic  t9,  t8,  t7,  t6,  t5,  t4,  t3,  t2,  t1,  t0 ;
-    assign t0  = x[ 3] ^  x[12];
-    assign t1  = x[ 9] &  x[ 5];
-    assign t2  = x[17] &  x[ 6];
-    assign t3  = x[10] ^  t1   ;
-    assign t4  = x[14] &  x[ 0];
-    assign t5  = t4    ^  t1   ;
-    assign t6  = x[ 3] &  x[12];
-    assign t7  = x[16] &  x[ 7];
-    assign t8  = t0    ^  t6   ;
-    assign t9  = x[15] &  x[13];
-    assign t10 = t9    ^  t6   ;
-    assign t11 = x[ 1] &  x[11];
-    assign t12 = x[ 4] &  x[20];
-    assign t13 = t12   ^  t11  ;
-    assign t14 = x[ 2] &  x[ 8];
-    assign t15 = t14   ^  t11  ;
-    assign t16 = t3    ^  t2   ;
-    assign t17 = t5    ^  x[18];
-    assign t18 = t8    ^  t7   ;
-    assign t19 = t10   ^  t15  ;
-    assign t20 = t16   ^  t13  ;
-    assign t21 = t17   ^  t15  ;
-    assign t22 = t18   ^  t13  ;
-    assign t23 = t19   ^  x[19];
-    assign t24 = t22   ^  t23  ;
-    assign t25 = t22   &  t20  ;
-    assign t26 = t21   ^  t25  ;
-    assign t27 = t20   ^  t21  ;
-    assign t28 = t23   ^  t25  ;
-    assign t29 = t28   &  t27  ;
-    assign t30 = t26   &  t24  ;
-    assign t31 = t20   &  t23  ;
-    assign t32 = t27   &  t31  ;
-    assign t33 = t27   ^  t25  ;
-    assign t34 = t21   &  t22  ;
-    assign t35 = t24   &  t34  ;
-    assign t36 = t24   ^  t25  ;
-    assign t37 = t21   ^  t29  ;
-    assign t38 = t32   ^  t33  ;
-    assign t39 = t23   ^  t30  ;
-    assign t40 = t35   ^  t36  ;
-    assign t41 = t38   ^  t40  ;
-    assign t42 = t37   ^  t39  ;
-    assign t43 = t37   ^  t38  ;
-    assign t44 = t39   ^  t40  ;
-    assign t45 = t42   ^  t41  ;
-    assign y[ 0] = t38 &  x[ 7];
-    assign y[ 1] = t37 &  x[13];
-    assign y[ 2] = t42 &  x[11];
-    assign y[ 3] = t45 &  x[20];
-    assign y[ 4] = t41 &  x[ 8];
-    assign y[ 5] = t44 &  x[ 9];
-    assign y[ 6] = t40 &  x[17];
-    assign y[ 7] = t39 &  x[14];
-    assign y[ 8] = t43 &  x[ 3];
-    assign y[ 9] = t38 &  x[16];
-    assign y[10] = t37 &  x[15];
-    assign y[11] = t42 &  x[ 1];
-    assign y[12] = t45 &  x[ 4];
-    assign y[13] = t41 &  x[ 2];
-    assign y[14] = t44 &  x[ 5];
-    assign y[15] = t40 &  x[ 6];
-    assign y[16] = t39 &  x[ 0];
-    assign y[17] = t43 &  x[12];
+    t0  = x[ 3] ^  x[12];
+    t1  = x[ 9] &  x[ 5];
+    t2  = x[17] &  x[ 6];
+    t3  = x[10] ^  t1   ;
+    t4  = x[14] &  x[ 0];
+    t5  = t4    ^  t1   ;
+    t6  = x[ 3] &  x[12];
+    t7  = x[16] &  x[ 7];
+    t8  = t0    ^  t6   ;
+    t9  = x[15] &  x[13];
+    t10 = t9    ^  t6   ;
+    t11 = x[ 1] &  x[11];
+    t12 = x[ 4] &  x[20];
+    t13 = t12   ^  t11  ;
+    t14 = x[ 2] &  x[ 8];
+    t15 = t14   ^  t11  ;
+    t16 = t3    ^  t2   ;
+    t17 = t5    ^  x[18];
+    t18 = t8    ^  t7   ;
+    t19 = t10   ^  t15  ;
+    t20 = t16   ^  t13  ;
+    t21 = t17   ^  t15  ;
+    t22 = t18   ^  t13  ;
+    t23 = t19   ^  x[19];
+    t24 = t22   ^  t23  ;
+    t25 = t22   &  t20  ;
+    t26 = t21   ^  t25  ;
+    t27 = t20   ^  t21  ;
+    t28 = t23   ^  t25  ;
+    t29 = t28   &  t27  ;
+    t30 = t26   &  t24  ;
+    t31 = t20   &  t23  ;
+    t32 = t27   &  t31  ;
+    t33 = t27   ^  t25  ;
+    t34 = t21   &  t22  ;
+    t35 = t24   &  t34  ;
+    t36 = t24   ^  t25  ;
+    t37 = t21   ^  t29  ;
+    t38 = t32   ^  t33  ;
+    t39 = t23   ^  t30  ;
+    t40 = t35   ^  t36  ;
+    t41 = t38   ^  t40  ;
+    t42 = t37   ^  t39  ;
+    t43 = t37   ^  t38  ;
+    t44 = t39   ^  t40  ;
+    t45 = t42   ^  t41  ;
+    y[ 0] = t38 &  x[ 7];
+    y[ 1] = t37 &  x[13];
+    y[ 2] = t42 &  x[11];
+    y[ 3] = t45 &  x[20];
+    y[ 4] = t41 &  x[ 8];
+    y[ 5] = t44 &  x[ 9];
+    y[ 6] = t40 &  x[17];
+    y[ 7] = t39 &  x[14];
+    y[ 8] = t43 &  x[ 3];
+    y[ 9] = t38 &  x[16];
+    y[10] = t37 &  x[15];
+    y[11] = t42 &  x[ 1];
+    y[12] = t45 &  x[ 4];
+    y[13] = t41 &  x[ 2];
+    y[14] = t44 &  x[ 5];
+    y[15] = t40 &  x[ 6];
+    y[16] = t39 &  x[ 0];
+    y[17] = t43 &  x[12];
 
     return y;
 endfunction
diff --git a/rtl/ibex_zk.sv b/rtl/ibex_zk.sv
index e2919d57c1..e62fada301 100644
--- a/rtl/ibex_zk.sv
+++ b/rtl/ibex_zk.sv
@@ -25,24 +25,24 @@ module ibex_zk #(
 // 32-bit Barrel Right Rotation
 function automatic logic [31:0] ror32(logic [31:0] x, logic [4:0] amt);
     logic [31:0] ro, l8, l4, l2, l1, l0;
-    assign l0 = x;
-    assign l1 = ({32{amt[0]}} & {l0[   0], l0[31: 1]}) | ({32{!amt[0]}} & l0[31:0]);
-    assign l2 = ({32{amt[1]}} & {l1[ 1:0], l1[31: 2]}) | ({32{!amt[1]}} & l1[31:0]);
-    assign l4 = ({32{amt[2]}} & {l2[ 3:0], l2[31: 4]}) | ({32{!amt[2]}} & l2[31:0]);
-    assign l8 = ({32{amt[3]}} & {l4[ 7:0], l4[31: 8]}) | ({32{!amt[3]}} & l4[31:0]);
-    assign ro = ({32{amt[4]}} & {l8[15:0], l8[31:16]}) | ({32{!amt[4]}} & l8[31:0]);
+    l0 = x;
+    l1 = ({32{amt[0]}} & {l0[   0], l0[31: 1]}) | ({32{!amt[0]}} & l0[31:0]);
+    l2 = ({32{amt[1]}} & {l1[ 1:0], l1[31: 2]}) | ({32{!amt[1]}} & l1[31:0]);
+    l4 = ({32{amt[2]}} & {l2[ 3:0], l2[31: 4]}) | ({32{!amt[2]}} & l2[31:0]);
+    l8 = ({32{amt[3]}} & {l4[ 7:0], l4[31: 8]}) | ({32{!amt[3]}} & l4[31:0]);
+    ro = ({32{amt[4]}} & {l8[15:0], l8[31:16]}) | ({32{!amt[4]}} & l8[31:0]);
     return ro;
 endfunction
 
 // 32-bit Barrel Left Rotation
 function automatic logic [31:0] rol32(logic [31:0] x, logic [4:0] amt);
     logic [31:0] ro, l8, l4, l2, l1, l0;
-    assign l0 = x;
-    assign l1 = ({32{amt[0]}} & {l0[30:0], l0[31   ]}) | ({32{!amt[0]}} & l0[31:0]);
-    assign l2 = ({32{amt[1]}} & {l1[29:0], l1[31:30]}) | ({32{!amt[1]}} & l1[31:0]);
-    assign l4 = ({32{amt[2]}} & {l2[27:0], l2[31:28]}) | ({32{!amt[2]}} & l2[31:0]);
-    assign l8 = ({32{amt[3]}} & {l4[23:0], l4[31:24]}) | ({32{!amt[3]}} & l4[31:0]);
-    assign ro = ({32{amt[4]}} & {l8[15:0], l8[31:16]}) | ({32{!amt[4]}} & l8[31:0]);
+    l0 = x;
+    l1 = ({32{amt[0]}} & {l0[30:0], l0[31   ]}) | ({32{!amt[0]}} & l0[31:0]);
+    l2 = ({32{amt[1]}} & {l1[29:0], l1[31:30]}) | ({32{!amt[1]}} & l1[31:0]);
+    l4 = ({32{amt[2]}} & {l2[27:0], l2[31:28]}) | ({32{!amt[2]}} & l2[31:0]);
+    l8 = ({32{amt[3]}} & {l4[23:0], l4[31:24]}) | ({32{!amt[3]}} & l4[31:0]);
+    ro = ({32{amt[4]}} & {l8[15:0], l8[31:16]}) | ({32{!amt[4]}} & l8[31:0]);
     return ro;
 endfunction
 
@@ -50,7 +50,7 @@ endfunction
 function automatic logic [7:0] rev8(logic [7:0] x);
     logic [7:0]  rb;
     for (int i = 0;  i < 8; i = i + 1) begin
-        assign rb[i] = x[8-i-1];
+        rb[i] = x[8-i-1];
     end
     return rb;
 endfunction
@@ -59,8 +59,8 @@ endfunction
 function automatic logic [31:0] zip32(logic [31:0] x);
     logic [31:0] uz;
     for (int i = 0;  i < 16; i = i + 1) begin
-        assign uz[2*i  ] = x[i];
-        assign uz[2*i+1] = x[i+16];
+        uz[2*i  ] = x[i];
+        uz[2*i+1] = x[i+16];
     end
     return uz;
 endfunction
@@ -69,8 +69,8 @@ endfunction
 function automatic logic [31:0] unzip32(logic [31:0] x);
     logic [15:0] zh, zl;
     for (int i = 0;  i < 16; i = i + 1) begin
-        assign zh[i] = x[2*i + 1];
-        assign zl[i] = x[2*i    ];
+        zh[i] = x[2*i + 1];
+        zl[i] = x[2*i    ];
     end
     return {zh, zl};
 endfunction
@@ -78,19 +78,19 @@ endfunction
 
 // Multiply by 2 in GF(2^8) modulo 8'h1b
 function automatic logic [7:0] xtime2(logic [7:0] a);
-    logic [7:0] xtime2;
-    xtime2  = {a[6:0],1'b0} ^ (a[7] ? 8'h1b : 8'b0 );
-    return xtime2;
+    logic [7:0] x2;
+    x2  = {a[6:0],1'b0} ^ (a[7] ? 8'h1b : 8'b0 );
+    return x2;
 endfunction
 
 // Paired down multiply by X in GF(2^8)
 function automatic logic [7:0] xtimeN(logic [7:0] a, logic [3:0] b);
-    logic [7:0] xtimeN;
-    xtimeN = (b[0] ?                      a   : 0) ^
-             (b[1] ? xtime2(              a)  : 0) ^
-             (b[2] ? xtime2(xtime2(       a)) : 0) ^
-             (b[3] ? xtime2(xtime2(xtime2(a))): 0) ;
-    return xtimeN;
+    logic [7:0] xn;
+    xn = (b[0] ?                      a   : 0) ^
+         (b[1] ? xtime2(              a)  : 0) ^
+         (b[2] ? xtime2(xtime2(       a)) : 0) ^
+         (b[3] ? xtime2(xtime2(xtime2(a))): 0) ;
+    return xn;
 endfunction
 
   logic        zkb_val;

From 704eaf39bbfc85cf4d82625bf2df10bf2329b007 Mon Sep 17 00:00:00 2001
From: phthinh <hung3@e.ntu.edu.sg>
Date: Wed, 8 Dec 2021 10:15:37 +0000
Subject: [PATCH 20/21] ZK: fix a line length style.

---
 rtl/ibex_decoder.sv | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/rtl/ibex_decoder.sv b/rtl/ibex_decoder.sv
index 50923625f4..6e07d9ffc0 100644
--- a/rtl/ibex_decoder.sv
+++ b/rtl/ibex_decoder.sv
@@ -528,8 +528,8 @@ module ibex_decoder #(
             {7'b001_0100, 3'b110}: illegal_insn = (RV32B == RV32BFull) ? 1'b0 : 1'b1; // xperm.h
             // RV32B zbp & RV32Zk zbk
             {7'b001_0100, 3'b010}, // xperm.n/xperm4
-            {7'b001_0100, 3'b100}: illegal_insn = ((RV32B  == RV32BFull ) ||
-                                                   (RV32Zk != RV32ZkNone)) ? 1'b0 : 1'b1;// xperm.b/xperm8
+            {7'b001_0100, 3'b100}: illegal_insn = ((RV32B  == RV32BFull ) ||   // xperm.b/xperm8
+                                                   (RV32Zk != RV32ZkNone)) ? 1'b0 : 1'b1;
             // RV32B zbc
             {7'b000_0101, 3'b001}, // clmul
             {7'b000_0101, 3'b011}: illegal_insn = ((RV32B  == RV32BFull ) ||

From 47f2dc98047b9833b985b3ed8373c62f10e60b40 Mon Sep 17 00:00:00 2001
From: phthinh <hung3@e.ntu.edu.sg>
Date: Tue, 4 Jan 2022 13:47:45 +0000
Subject: [PATCH 21/21] fix lint errors

---
 rtl/ibex_decoder.sv | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

diff --git a/rtl/ibex_decoder.sv b/rtl/ibex_decoder.sv
index ffc6cd7efc..67a278df87 100644
--- a/rtl/ibex_decoder.sv
+++ b/rtl/ibex_decoder.sv
@@ -535,18 +535,16 @@ module ibex_decoder #(
                                                    (RV32Zk != RV32ZkNone)) ? 1'b0 : 1'b1;
             // RV32B zbc
             {7'b000_0101, 3'b001}, // clmul
-            {7'b000_0101, 3'b011}: illegal_insn = ((RV32B  == RV32BFull ) || 
+            {7'b000_0101, 3'b011}: illegal_insn = ((RV32B  == RV32BFull ) ||
                                                    (RV32B == RV32BOTEarlGrey) ||
                                                    (RV32Zk != RV32ZkNone)) ? 1'b0 : 1'b1; // clmulh
             // RV32B zbc
             {7'b000_0101, 3'b010}: begin // clmulr
               illegal_insn = (RV32B == RV32BOTEarlGrey || RV32B == RV32BFull) ? 1'b0 : 1'b1;
             end
-            
             // RV32B zbe
             {7'b010_0100, 3'b110}, // bdecompress
             {7'b000_0100, 3'b110}: illegal_insn = (RV32B == RV32BFull) ? 1'b0 : 1'b1; // bcompress
-            
             // RV32Zk zkh
             {7'b010_1000, 3'b000}, // sha512_sum0r
             {7'b010_1001, 3'b000}, // sha512_sum1r
@@ -1157,7 +1155,6 @@ module ibex_decoder #(
             {7'b001_0100, 3'b110}: begin
               if (RV32B == RV32BOTEarlGrey || RV32B == RV32BFull) alu_operator_o = ALU_XPERM_H;
             end
-              
             // RV32B zbp & RV32K zkb
             {7'b001_0100, 3'b010}: begin
               if (RV32B == RV32BOTEarlGrey || RV32B == RV32BFull) alu_operator_o = ALU_XPERM_N;
@@ -1167,7 +1164,6 @@ module ibex_decoder #(
               if (RV32B == RV32BOTEarlGrey || RV32B == RV32BFull) alu_operator_o = ALU_XPERM_B;
               else if (RV32Zk != RV32ZkNone) alu_operator_o = ZKB_XPERM8;  // xperm8
             end
-
             // RV32B zbc & RV32K zkb
             {7'b000_0101, 3'b001}: begin
               if (RV32B == RV32BOTEarlGrey || RV32B == RV32BFull) alu_operator_o = ALU_CLMUL;